Source code for maxsmooth.chidist_plotter

"""

This function allows the user to produce plots of the chi squared
distribution as a function of the available discrete sign spaces for the
constrained derivatives. This can be used to identify whether or not the
problem is `ill defined`, see the ``maxsmooth`` paper for a definition,
and if it can be solved using the sign sampling approach.

It can also be used to determine whether or not the 'cap' and maximum allowed
increase on the value of chi squared during the directional exploration
are sufficient to identify the global minimum for the problem.

The function is reliant on the output of the ``maxsmooth`` smooth() function.
The required outputs can be saved when running smooth()
using the 'data_save = True' kwarg.
"""

import numpy as np
import os
from itertools import product
import matplotlib.pyplot as plt


[docs]class chi_plotter(object):

    r"""

    **Parameters:**

        N: **int**
            | The number of terms in the DCF.

    **Kwargs:**

        fit_type: **Default = 'qp-sign_flipping'**
            | This kwarg is the same as for the smooth() function.
                Here it allows the files to be read from the base
                directory.

        base_dir: **Default = 'Fitted_Output/'**
            | The location of the outputted
                data from ``maxsmooth``. This must be a string and end in '/'
                and must contain the files 'Output_Evaluations/' and
                'Output_Signs/' which can be obtained by running smooth() with
                data_save=True.

        chi: **Default = None else list or numpy array**
            | A list of
                chi squared evaluations. If provided then this is used
                over outputted data in the base directory. It must have the
                same length as the ouputted signs in the file 'Output_Signs/'
                in the base directory. It must also be ordered correctly
                otherwise the returned graph will not be correct. A correct
                ordering is one for which each entry in the array corresponds
                to the correct sign combination in 'Output_Signs/'.
                Typically this will not be needed but if the chi squared
                evaluation in 'Output_Evaluations/' in the base directory
                is not in the desired parameter space this can be useful.
                For example the built in logarithmic model calculates
                chi squared in logarithmic space. To plot the distribution
                in linear space we can calculate
                chi squared in linear space using a function for the model
                and the tested parameters which are found in
                'Output_Parameters/' in the base directory.

        **constraints: Default = 2 else an integer less than or equal**
        **to N - 1**
            | The minimum constrained derivative order which is set by default
                to 2 for a Maximally Smooth Function. Used here to determine
                the number of possible sign combinations available.

        zero_crossings: **Default = None else list of integers**
            | Allows you to
                specify if the conditions should be relaxed on any
                of the derivatives between constraints and the highest order
                derivative. e.g. a 6th order fit with just a constrained 2nd
                and 3rd order derivative would have a zero_crossings = [4, 5].
                Again this is used in determining the possible sign
                combinations available.

        plot_limits: **Default = False**
            | Determines whether the limits on
                the directional exploration are plotted on top of the
                chi squared distribution.

        cap: **Default = (len(available_signs)//N) + N else an integer**
            | Determines the maximum number of signs explored either side of
                the minimum chi squared value found after the
                decent algorithm has terminated when running smooth(). Here
                it is used when plot_limits=True.

        chi_squared_limit: **Default = 2 else float or int**
            | The prefactor on the maximum allowed increase in chi squared
                during the directional exploration which is defaulted at 2.
                If this value multiplied by the minimum chi squared
                value found after the descent algorithm is exceeded then the
                exploration in one direction is stopped and started in the
                other. For more details on this and 'cap' see the ``maxsmooth``
                paper. Again this is used here
                when plot_limits=True.

    """
    def __init__(self, N, **kwargs):

        self.N = N
        if self.N % 1 != 0:
            raise ValueError('N must be an integer or whole number float.')

        for keys, values in kwargs.items():
            if keys not in set([
                    'chi', 'base_dir',
                    'zero_crossings', 'constraints',
                    'fit_type', 'chi_squared_limit', 'cap', 'plot_limits']):
                raise KeyError("Unexpected keyword argument in chi_plotter().")

        self.base_dir = kwargs.pop('base_dir', 'Fitted_Output/')
        if type(self.base_dir) is not str:
            raise KeyError("'base_dir' must be a string ending in '/'.")
        elif self.base_dir.endswith('/') is False:
            raise KeyError("'base_dir' must end in '/'.")

        if not os.path.exists(self.base_dir):
            raise Exception(
                "'base_dir' must exist and contain the outputted"
                + " evaluations and sign combinations from a maxsmooth fit."
                + " These can be obtained by running maxsmooth with"
                + " 'data_save=True'.")
        else:
            if not os.path.exists(self.base_dir + 'Output_Evaluation/'):
                raise Exception(
                    "No 'Output_Evaluation/' directory found in"
                    + " 'base_dir'.")
            if not os.path.exists(self.base_dir + 'Output_Signs/'):
                raise Exception(
                    "No 'Output_Signs/' directory found in"
                    + " 'base_dir'.")

        self.chi = kwargs.pop('chi', None)

        self.constraints = kwargs.pop('constraints', 2)
        if type(self.constraints) is not int:
            raise TypeError("'constraints' is not an integer")
        if self.constraints > self.N-1:
            raise ValueError(
                "'constraints' exceeds the number" +
                " of derivatives.")

        self.zero_crossings = kwargs.pop('zero_crossings', None)
        if self.zero_crossings is not None:
            for i in range(len(self.zero_crossings)):
                if type(self.zero_crossings[i]) is not int:
                    raise TypeError(
                        "Entries in 'zero_crossings'" +
                        " are not integer.")
                if self.zero_crossings[i] < self.constraints:
                    raise ValueError(
                        'One or more specified derivatives for' +
                        ' zero crossings is less than the minimum' +
                        ' constrained' +
                        ' derivative.\n zero_crossings = '
                        + str(self.zero_crossings)
                        + '\n' + ' Minimum Constrained Derivative = '
                        + str(self.constraints))

        self.fit_type = kwargs.pop('fit_type', 'qp-sign_flipping')
        if self.fit_type not in set(['qp', 'qp-sign_flipping']):
            raise KeyError(
                "Invalid 'fit_type'. Valid entries include 'qp'\n" +
                "'qp-sign_flipping'")

        self.chi_squared_limit = kwargs.pop('chi_squared_limit', None)
        self.cap = kwargs.pop('cap', None)
        if self.chi_squared_limit is not None:
            if isinstance(self.chi_squared_limit, int) or \
                    isinstance(self.chi_squared_limit, float):
                pass
            else:
                raise TypeError(
                    "Limit on maximum allowed increase in chi squared" +
                    ", 'chi_squared_limit', is not an integer or float.")
        if self.cap is not None:
            if type(self.cap) is not int:
                raise TypeError(
                    "The cap on directional exploration" +
                    ", 'cap', is not an integer.")

        self.plot_limits = kwargs.pop('plot_limits', False)
        if type(self.plot_limits) is not bool:
            raise TypeError(
                "Boolean keyword argument with value "
                + " 'plot_limits' is not True or False.")

        self.plot()

    def plot(self):

        def signs_array(nums):
            return np.array(list(product(*((x, -x) for x in nums))))

        if self.zero_crossings is not None:
            possible_signs = signs_array([1]*(
                self.N-self.constraints-len(self.zero_crossings)))
        else:
            possible_signs = signs_array([1]*(self.N-self.constraints))

        plt.figure()
        j = np.arange(0, len(possible_signs), 1)
        if self.chi is None:
            chi = np.loadtxt(
                self.base_dir + 'Output_Evaluation/'
                + str(self.N) + '_' + str(self.fit_type) + '.txt')
            signs = np.loadtxt(
                self.base_dir + 'Output_Signs/'
                + str(self.N) + '_' + str(self.fit_type) + '.txt')
            if len(signs) != len(possible_signs):
                index = []
                for p in range(len(signs)):
                    for i in range(len(possible_signs)):
                        if np.all(signs[p] == possible_signs[i]):
                            index.append(i)
                index, chi = zip(*sorted(zip(index, chi)))
                plt.plot(index, chi, ls='-')
            else:
                plt.plot(j, chi, marker='.', ls='-')
        else:
            chi = self.chi
            signs = np.loadtxt(
                self.base_dir + 'Output_Signs/'
                + str(self.N) + '_' + str(self.fit_type) + '.txt')
            if len(signs) != len(possible_signs):
                index = []
                for p in range(len(signs)):
                    for i in range(len(possible_signs)):
                        if np.all(signs[p] == possible_signs[i]):
                            index.append(i)
                index, chi = zip(*sorted(zip(index, chi)))
                plt.plot(index, chi, marker='.', ls='-')
            else:
                plt.plot(j, chi, marker='.', ls='-')

        if self.cap is None:
            self.cap = (len(possible_signs)//self.N) + self.N
        if self.chi_squared_limit is None:
            self.chi_squared_limit = 2*min(chi)

        for i in range(len(chi)):
            if chi[i] == min(chi):
                plt.plot(i, chi[i], marker='*')
                if self.plot_limits is True:
                    plt.vlines(
                        i + self.cap, min(chi), max(chi), ls='--',
                        label='Cap On Exp.', color='k', alpha=0.5)
                    plt.vlines(
                        i - self.cap,  min(chi), max(chi),
                        ls='--', color='k', alpha=0.5)
        if self.plot_limits is True:
            min_chi = np.load(
                self.base_dir + str(self.N) +
                '_'+self.fit_type+'_minimum_chi_post_descent.npy')
            plt.hlines(
                self.chi_squared_limit*min_chi, 0, len(possible_signs),
                ls='-.', label=r'Max. Increase\n' +
                ' in $\chi^2$',   # noqa: W605
                color='k', alpha=0.5)
        plt.xlim([j[0], j[-1]])
        plt.grid()
        plt.yscale('log')
        plt.ylabel(r'$\chi^2$')
        plt.xlabel('Sign Combination')
        plt.tight_layout()
        plt.savefig(self.base_dir + 'chi_distribution.pdf')
        plt.close()