Source code for maxsmooth.chidist_plotter

"""

This function allows the user to produce plots of the chi squared
distribution as a function of the available discrete sign spaces for the
constrained derivatives. This can be used to identify whether or not the
problem is `ill defined`, see the ``maxsmooth`` paper for a definition,
and if it can be solved using the sign sampling approach.

It can also be used to determine whether or not the 'cap' and maximum allowed
increase on the value of chi squared during the directional exploration
are sufficient to identify the global minimum for the problem.

The function is reliant on the output of the ``maxsmooth`` smooth() function.
The required outputs can be saved when running smooth()
using the 'data_save = True' kwarg.
"""

import numpy as np
import os
from itertools import product
import matplotlib.pyplot as plt


[docs]class chi_plotter(object): r""" **Parameters:** N: **int** | The number of terms in the DCF. **Kwargs:** fit_type: **Default = 'qp-sign_flipping'** | This kwarg is the same as for the smooth() function. Here it allows the files to be read from the base directory. base_dir: **Default = 'Fitted_Output/'** | The location of the outputted data from ``maxsmooth``. This must be a string and end in '/' and must contain the files 'Output_Evaluations/' and 'Output_Signs/' which can be obtained by running smooth() with data_save=True. chi: **Default = None else list or numpy array** | A list of chi squared evaluations. If provided then this is used over outputted data in the base directory. It must have the same length as the ouputted signs in the file 'Output_Signs/' in the base directory. It must also be ordered correctly otherwise the returned graph will not be correct. A correct ordering is one for which each entry in the array corresponds to the correct sign combination in 'Output_Signs/'. Typically this will not be needed but if the chi squared evaluation in 'Output_Evaluations/' in the base directory is not in the desired parameter space this can be useful. For example the built in logarithmic model calculates chi squared in logarithmic space. To plot the distribution in linear space we can calculate chi squared in linear space using a function for the model and the tested parameters which are found in 'Output_Parameters/' in the base directory. **constraints: Default = 2 else an integer less than or equal** **to N - 1** | The minimum constrained derivative order which is set by default to 2 for a Maximally Smooth Function. Used here to determine the number of possible sign combinations available. zero_crossings: **Default = None else list of integers** | Allows you to specify if the conditions should be relaxed on any of the derivatives between constraints and the highest order derivative. e.g. a 6th order fit with just a constrained 2nd and 3rd order derivative would have a zero_crossings = [4, 5]. Again this is used in determining the possible sign combinations available. plot_limits: **Default = False** | Determines whether the limits on the directional exploration are plotted on top of the chi squared distribution. cap: **Default = (len(available_signs)//N) + N else an integer** | Determines the maximum number of signs explored either side of the minimum chi squared value found after the decent algorithm has terminated when running smooth(). Here it is used when plot_limits=True. chi_squared_limit: **Default = 2 else float or int** | The prefactor on the maximum allowed increase in chi squared during the directional exploration which is defaulted at 2. If this value multiplied by the minimum chi squared value found after the descent algorithm is exceeded then the exploration in one direction is stopped and started in the other. For more details on this and 'cap' see the ``maxsmooth`` paper. Again this is used here when plot_limits=True. """ def __init__(self, N, **kwargs): self.N = N if self.N % 1 != 0: raise ValueError('N must be an integer or whole number float.') for keys, values in kwargs.items(): if keys not in set([ 'chi', 'base_dir', 'zero_crossings', 'constraints', 'fit_type', 'chi_squared_limit', 'cap', 'plot_limits']): raise KeyError("Unexpected keyword argument in chi_plotter().") self.base_dir = kwargs.pop('base_dir', 'Fitted_Output/') if type(self.base_dir) is not str: raise KeyError("'base_dir' must be a string ending in '/'.") elif self.base_dir.endswith('/') is False: raise KeyError("'base_dir' must end in '/'.") if not os.path.exists(self.base_dir): raise Exception( "'base_dir' must exist and contain the outputted" + " evaluations and sign combinations from a maxsmooth fit." + " These can be obtained by running maxsmooth with" + " 'data_save=True'.") else: if not os.path.exists(self.base_dir + 'Output_Evaluation/'): raise Exception( "No 'Output_Evaluation/' directory found in" + " 'base_dir'.") if not os.path.exists(self.base_dir + 'Output_Signs/'): raise Exception( "No 'Output_Signs/' directory found in" + " 'base_dir'.") self.chi = kwargs.pop('chi', None) self.constraints = kwargs.pop('constraints', 2) if type(self.constraints) is not int: raise TypeError("'constraints' is not an integer") if self.constraints > self.N-1: raise ValueError( "'constraints' exceeds the number" + " of derivatives.") self.zero_crossings = kwargs.pop('zero_crossings', None) if self.zero_crossings is not None: for i in range(len(self.zero_crossings)): if type(self.zero_crossings[i]) is not int: raise TypeError( "Entries in 'zero_crossings'" + " are not integer.") if self.zero_crossings[i] < self.constraints: raise ValueError( 'One or more specified derivatives for' + ' zero crossings is less than the minimum' + ' constrained' + ' derivative.\n zero_crossings = ' + str(self.zero_crossings) + '\n' + ' Minimum Constrained Derivative = ' + str(self.constraints)) self.fit_type = kwargs.pop('fit_type', 'qp-sign_flipping') if self.fit_type not in set(['qp', 'qp-sign_flipping']): raise KeyError( "Invalid 'fit_type'. Valid entries include 'qp'\n" + "'qp-sign_flipping'") self.chi_squared_limit = kwargs.pop('chi_squared_limit', None) self.cap = kwargs.pop('cap', None) if self.chi_squared_limit is not None: if isinstance(self.chi_squared_limit, int) or \ isinstance(self.chi_squared_limit, float): pass else: raise TypeError( "Limit on maximum allowed increase in chi squared" + ", 'chi_squared_limit', is not an integer or float.") if self.cap is not None: if type(self.cap) is not int: raise TypeError( "The cap on directional exploration" + ", 'cap', is not an integer.") self.plot_limits = kwargs.pop('plot_limits', False) if type(self.plot_limits) is not bool: raise TypeError( "Boolean keyword argument with value " + " 'plot_limits' is not True or False.") self.plot() def plot(self): def signs_array(nums): return np.array(list(product(*((x, -x) for x in nums)))) if self.zero_crossings is not None: possible_signs = signs_array([1]*( self.N-self.constraints-len(self.zero_crossings))) else: possible_signs = signs_array([1]*(self.N-self.constraints)) plt.figure() j = np.arange(0, len(possible_signs), 1) if self.chi is None: chi = np.loadtxt( self.base_dir + 'Output_Evaluation/' + str(self.N) + '_' + str(self.fit_type) + '.txt') signs = np.loadtxt( self.base_dir + 'Output_Signs/' + str(self.N) + '_' + str(self.fit_type) + '.txt') if len(signs) != len(possible_signs): index = [] for p in range(len(signs)): for i in range(len(possible_signs)): if np.all(signs[p] == possible_signs[i]): index.append(i) index, chi = zip(*sorted(zip(index, chi))) plt.plot(index, chi, ls='-') else: plt.plot(j, chi, marker='.', ls='-') else: chi = self.chi signs = np.loadtxt( self.base_dir + 'Output_Signs/' + str(self.N) + '_' + str(self.fit_type) + '.txt') if len(signs) != len(possible_signs): index = [] for p in range(len(signs)): for i in range(len(possible_signs)): if np.all(signs[p] == possible_signs[i]): index.append(i) index, chi = zip(*sorted(zip(index, chi))) plt.plot(index, chi, marker='.', ls='-') else: plt.plot(j, chi, marker='.', ls='-') if self.cap is None: self.cap = (len(possible_signs)//self.N) + self.N if self.chi_squared_limit is None: self.chi_squared_limit = 2*min(chi) for i in range(len(chi)): if chi[i] == min(chi): plt.plot(i, chi[i], marker='*') if self.plot_limits is True: plt.vlines( i + self.cap, min(chi), max(chi), ls='--', label='Cap On Exp.', color='k', alpha=0.5) plt.vlines( i - self.cap, min(chi), max(chi), ls='--', color='k', alpha=0.5) if self.plot_limits is True: min_chi = np.load( self.base_dir + str(self.N) + '_'+self.fit_type+'_minimum_chi_post_descent.npy') plt.hlines( self.chi_squared_limit*min_chi, 0, len(possible_signs), ls='-.', label=r'Max. Increase\n' + ' in $\chi^2$', # noqa: W605 color='k', alpha=0.5) plt.xlim([j[0], j[-1]]) plt.grid() plt.yscale('log') plt.ylabel(r'$\chi^2$') plt.xlabel('Sign Combination') plt.tight_layout() plt.savefig(self.base_dir + 'chi_distribution.pdf') plt.close()