Source code for ugtm.ugtm_classes

"""Defines classes for initial and optimized GTM model.
"""
# Authors: Helena A. Gaspar <hagax8@gmail.com>
# License: MIT

from __future__ import print_function
import numpy as np



[docs]
class ReturnU(object):
    def __init__(self, matU, betaInv):
        self.matU = matU
        self.betaInv = betaInv




[docs]
class InitialGTM(object):
    r"""Class for initial GTM model.

    Arguments
    ----------
    matX : array of shape (n_nodes, 2)
        Coordinates of nodes defining a grid in the 2D space.
    matM : array of shape (n_rbf_centers, 2)
        Coordinates of radial basis function (RBF) centers,
        defining a grid in the 2D space.
    n_nodes : int
        The number of nodes defining a grid in the 2D space.
    n_rbf_centers : int
        The number of radial basis function (RBF) centers.
    rbfWidth : float
        Initial radial basis function (RBF) width.
        This is set to the average of the minimum distance between RBF centers:
        :math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`,
        where :math:`sigma` is the GTM hyperparameter s.
        NB: if GTM hyperparameter s = 0 (not recommended),
        rbfWidth is set to the maximum distance between RBF centers.
    matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1)
        RBF matrix plus one dimension to include a term for bias.
    matW: array of shape (n_dimensions, n_rbf_centers+1)
        Parameter matrix (PCA-initialized).
    matY: array of shape (n_dimensions, n_nodes)
        Manifold in n-dimensional space (projection of matX in data space);
        A point matY[:,i] is a center of Gaussian component in data space.
        :math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T`
    betaInv: float
        Noise variance parameter for the data distribution.
        Written as :math:`\beta^{-1}` in the original paper.
        Initialized to be the larger between:
        (1) the 3rd eigenvalue of the data covariance matrix,
        (2) half the average distance between Gaussian component centers
        in the data space (matY matrix).
    n_dimensions: int
        Data space dimensionality (number of variables).
    """

    def __init__(self, matX, matM, n_nodes, n_rbf_centers, rbfWidth,
                 matPhiMPlusOne, matW, matY, betaInv, n_dimensions):
        r"""Constructor for InitialGTM class.

        Parameters
        ----------
        matX : array of shape (n_nodes, 2)
            Coordinates of nodes defining a grid in the 2D space.
        matM : array of shape (n_rbf_centers, 2)
            Coordinates of radial basis function (RBF) centers,
            defining a grid in the 2D space.
        n_nodes : int
            The number of nodes defining a grid in the 2D space.
        n_rbf_centers : int
            The number of radial basis function (RBF) centers.
        rbfWidth : float
            Initial radial basis function (RBF) width.
            This is set to the average of the minimum distance between RBF centers:
            :math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`,
            where :math:`sigma` is the GTM hyperparameter s.
            NB: if GTM hyperparameter s = 0 (not recommended),
            rbfWidth is set to the maximum distance between RBF centers.
        matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1)
            RBF matrix plus one dimension to include a term for bias.
        matW: array of shape (n_dimensions, n_rbf_centers+1)
            Parameter matrix (PCA-initialized).
        matY: array of shape (n_dimensions, n_nodes)
            Manifold in n-dimensional space (projection of matX in data space);
            A point matY[:,i] is a Gaussian component center in data space.
            :math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T`
        betaInv: float
            Noise variance parameter for the data distribution.
            Written as :math:`\beta^{-1}` in the original paper.
            Initialized to be the larger between:
            (1) the 3rd eigenvalue of the data covariance matrix,
            (2) half the average distance between Gaussian component centers
            in the data space (matY matrix).
        n_dimensions: int
            Data space dimensionality (number of variables).
        """
        self.matX = matX
        self.matM = matM
        self.n_rbf_centers = n_rbf_centers
        self.n_nodes = n_nodes
        self.rbfWidth = rbfWidth
        self.matPhiMPlusOne = matPhiMPlusOne
        self.matW = matW
        self.matY = matY
        self.betaInv = betaInv
        self.n_dimensions = n_dimensions




[docs]
class OptimizedGTM(object):
    r"""Class for optimized GTM model.

    Attributes
    ----------
    matX : array of shape (n_nodes, 2)
        Coordinates of nodes defining a grid in the 2D space.
    matW : array of shape (n_dimensions, n_rbf_centers+1)
        Parameter matrix (PCA-initialized).
    matY : array of shape (n_dimensions, n_nodes)
        Manifold in n-dimensional space (projection of matX in data space).
        matY = np.dot(matW, np.transpose(matPhiMPlusOne))
    matP : array of shape (n_individuals, n_nodes)
        Data distribution with variance betaInv.
    matR : array of shape (n_individuals, n_nodes)
        Responsibilities (posterior probabilities),
        used to compute data representations:
        means (matMeans) and modes (matModes).
        Responsibilities are the main output of GTM.
        matR[i,:] represents the responsibility vector for an instance i.
        The columns in matR correspond to rows in matX (nodes).
    betaInv: float
        Noise variance parameter for the data distribution.
        Written as :math:`\beta^{-1}` in the original paper.
    matMeans : array of shape (n_individuals, 2)
        Data representation in 2D space: means (most commonly used for GTM).
    matModes : array of shape(n_individuals, 2)
        Data representation in 2D space: modes
        (for each instance, coordinate with highest responsibility).
    n_dimensions : int
        Data space dimensionality (number of variables).
    converged : bool
        True if the model has converged; otherwise False.
    """

    def __init__(self, matW, matY, matP, matR, betaInv, matMeans,
                 matModes, matX, n_dimensions, converged):
        r"""Constructor for OptimizedGTM class.

        Parameters
        ----------
        matX : array of shape (n_nodes, 2)
            Coordinates of nodes defining a grid in the 2D space.
        matW : array of shape (n_dimensions, n_rbf_centers+1)
            Parameter matrix (PCA-initialized).
        matY : array of shape (n_dimensions, n_nodes)
            Manifold in n-dimensional space (projection of matX in data space).
            matY = np.dot(matW, np.transpose(matPhiMPlusOne))
        matP : array of shape (n_individuals, n_nodes)
            Data distribution with variance betaInv.
        matR : array of shape (n_individuals, n_nodes)
            Responsibilities (posterior probabilities),
            used to compute data representations:
            means (matMeans) and modes (matModes).
            Responsibilities are the main output of GTM.
            matR[i,:] represents the responsibility vector for an instance i.
            The columns in matR correspond to rows in matX (nodes).
        betaInv: float
            Noise variance parameter for the data distribution.
            Written as :math:`\beta^{-1}` in the original paper.
        matMeans : array of shape (n_individuals, 2)
            Data representation in 2D space: means (most commonly used for GTM).
        matModes : array of shape(n_individuals, 2)
            Data representation in 2D space: modes
            (for each instance, coordinate with highest responsibility).
        n_dimensions : int
            Data space dimensionality (number of variables).
        converged : bool
            True if the model has converged; otherwise False.
        """
        self.matW = matW
        self.matY = matY
        self.matP = matP
        self.matR = matR
        self.betaInv = betaInv
        self.matMeans = matMeans
        self.matModes = matModes
        self.matX = matX
        self.n_dimensions = n_dimensions
        self.converged = converged


[docs]
    def write(self, output="output"):
        """Write optimized GTM model: means, modes and responsibilities.

        Parameters
        ----------
        output : str, optional (default = 'output')
            Output path.

        Returns
        -------
        CSV files
            Separate files for (1) means (mean position for each data point),
            (2) modes (node with max. responsibility for each data point),
            (3) responsibilities (posterior probabilities for each data point)
        """
        np.savetxt(fname=output+"_responsibilities.csv",
                   X=self.matR, delimiter=",")
        np.savetxt(fname=output+"_coordinates.csv",
                   X=self.matMeans, delimiter=",")
        np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",")
        print("")
        print("Wrote to disk:")
        print("")
        print("%s: responsibilities, which represent "
              "each individual's encoding "
              "on the map (dimensions=n_individuals*n_nodes_on_the_map)"
              % (output+"_responsibilities.csv"))
        print("")
        print("%s: coordinates to plot, which represent each individual's "
              "mean position on the map (dimensions = "
              "n_individuals*n_latent_dimensions)"
              % (output+"_coordinates.csv"))
        print("")
        print("%s: modes positions for each individual on the map "
              "(node with max probability for the individual; "
              "dimensions = n_individuals*n_latent_dimensions)"
              % (output+"_modes.csv"))
        print("")
        print("")



[docs]
    def write_all(self, output="output"):
        """Write optimized GTM model and optimized parameters.

        Parameters
        ----------
        output : str, optional (default = 'output')
            Output path.

        Returns
        -------
        CSV files
            Separate files for (1) means (mean position for each data point),
            (2) modes (node with max. responsibility for each data point),
            (3) responsibilities (posterior probabilities for each data point),
            (4) initial space dimension and data distribution variance,
            (5) manifold coordinates (matY),
            (6) parameter matrix (matW)
        """
        outparams = "n_dimensions:"+str(self.n_dimensions) + \
                    "\n"+"variance:"+str(self.betaInv)
        np.savetxt(fname=output+"_responsibilities.csv",
                   X=self.matR, delimiter=",")
        np.savetxt(fname=output+"_coordinates.csv",
                   X=self.matMeans, delimiter=",")
        np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",")
        np.savetxt(fname=output+"_manifold.csv", X=self.matY, delimiter=",")
        np.savetxt(fname=output+"_parametersMatrix.csv",
                   X=self.matW, delimiter=",")
        np.savetxt(fname=output+"_dimensionsAndVariance.csv", X=outparams)
        print("")
        print("Wrote to disk:")
        print("")
        print("%s: responsibilities, which represent "
              "each individual's encoding on the map "
              "(dimensions=n_individuals*n_nodes_on_the_map)"
              % (output+"_responsibilities.csv"))
        print("")
        print("%s: coordinates to plot, which represent each individual's "
              "mean position on the map "
              "(dimensions = n_individuals*n_latent_dimensions)"
              % (output+"_coordinates.csv"))
        print("")
        print("%s: modes positions for each individual on the map "
              "(node with max probability for the individual; "
              "dimensions = n_individuals*n_latent_dimensions)"
              % (output+"_modes.csv"))
        print("")
        print("%s: manifold coordinates in the initial data space "
              "(dimensions: n_data_dimensions*n_points_on_manifold"
              % (output+"_manifold.csv"))
        print("")
        print("%s: parameters matrix"
              % (output+"_parametersMatrix.csv"))
        print("")
        print("%s: initial space and variance"
              % (output+"_dimensionsAndVariance.csv"))
        print("")
        print("")