Source code for maxsmooth.best_basis

"""
As demonstrated, this function allows you to test the built in basis and their
ability to
fit the data. It produces a plot that shows chi squared as a function of
:math:`{N}` for the 7 built in models and saves the figure to the base
directory.
"""

import numpy as np
import matplotlib.pyplot as plt
import os
from maxsmooth.DCF import smooth


[docs]class basis_test(object):

    r"""
    **Parameters:**

        x: **numpy.array**
            | The x data points for the set being fitted.

        y: **numpy.array**
            | The y data points for fitting.

    **Kwargs:**

        fit_type: **Default = 'qp-sign_flipping'**
            | This kwarg allows the user to switch between sampling the
                available discrete sign spaces (default)
                or testing all sign combinations on the derivatives which can
                be accessed by setting to 'qp'.

        base_dir: **Default = 'Fitted_Output/'**
            | The location of the outputted
                graph from function. This must be a string and end in '/'. If
                the file does not exist then the function will create it.

        **N: Default = [3, .., 13] in steps of 1 else list or numpy array**
        **of integers**
            | The DCF orders to test each basis function with. In
                some instances the basis function may fail for a given
                :math:`{N}` and higher orders due to overflow/underflow
                errors or ``CVXOPT`` errors.

        **pivot_point: Default = len(x)//2 otherwise an integer between**
        **-len(x) and len(x)**
            | Some of the built in
                models rely on pivot points in the data sets which by defualt
                is set as the middle index. This can be altered via
                this kwarg which can occasionally lead to a better quality fit.

        **constraints: Default = 2 else an integer less than or equal**
        **to N - 1**
            | The minimum constrained derivative order which is set by default
                to 2 for a Maximally Smooth Function.

        zero_crossings: **Default = None else list of integers**
            | Allows you to
                specify if the conditions should be relaxed on any
                of the derivatives between constraints and the highest order
                derivative. e.g. a 6th order fit with just a constrained 2nd
                and 3rd order derivative would have zero_crossings = [4, 5].

        cap: **Default = (len(available_signs)//N) + N else an integer**
            | Determines the maximum number of signs explored either side of
                the minimum :math:`{\chi^2}` value found after the decent
                algorithm has terminated.

        chi_squared_limit: **Default = 2 else float or int**
            | The prefactor on the maximum allowed increase in :math:`{\chi^2}`
                during the directional exploration which is defaulted at 2.
                If this value multiplied by the minimum :math:`{\chi^2}`
                value found after the descent algorithm is exceeded then the
                exploration in one direction is stopped and started in the
                other. For more details on this and 'cap' see the ``maxsmooth``
                paper.

        cvxopt_maxiter: **Default = 10000 else integer**
            | This shouldn't need
                changing for most problems however if ``CVXOPT`` fails with a
                'maxiters reached' error message this can be increased.
                Doing so arbitrarily will however increase the run time of
                ``maxsmooth``.

    """

    def __init__(self, x, y, **kwargs):
        self.x = x
        self.y = y

        for keys, values in kwargs.items():
            if keys not in set([
                    'fit_type', 'base_dir', 'N', 'pivot_point',
                    'constraints', 'zero_crossings', 'chi_squared_limit',
                    'cap', 'cvxopt_maxiter']):
                raise KeyError(
                    "Unexpected keyword argument in basis_test().")

        self.fit_type = kwargs.pop('fit_type', 'qp-sign_flipping')
        if self.fit_type not in set(['qp', 'qp-sign_flipping']):
            raise KeyError(
                "Invalid 'fit_type'. Valid entries include 'qp'\n"
                + "'qp-sign_flipping'")

        self.base_dir = kwargs.pop('base_dir', 'Fitted_Output/')
        if type(self.base_dir) is not str:
            raise KeyError("'base_dir' must be a string ending in '/'.")
        elif self.base_dir.endswith('/') is False:
            raise KeyError("'base_dir' must end in '/'.")

        self.N = kwargs.pop('N', np.arange(3, 14, 1))
        for i in range(len(self.N)):
            if self.N[i] % 1 != 0:
                raise ValueError(
                    'N must be an integer or whole number float.')

        self.pivot_point = kwargs.pop('pivot_point', len(self.x)//2)
        if type(self.pivot_point) is not int:
            raise TypeError('Pivot point is not an integer index.')
        elif self.pivot_point >= len(self.x) or \
                self.pivot_point < -len(self.x):
            raise ValueError(
                'Pivot point must be in the range -len(x) - len(x).')

        self.constraints = kwargs.pop('constraints', 2)
        if type(self.constraints) is not int:
            raise TypeError("'constraints' is not an integer")
        if type(self.N) is list:
            self.N = np.array(self.N)
        if self.constraints >= self.N.min() and \
                self.constraints < self.N.max():
            self.N = np.arange(self.constraints + 1, self.N.max()+1, 1)
        elif self.constraints >= self.N.max():
            raise ValueError(
                "'constraints' exceeds the number of derivatives" +
                " for highest value N provided to the function." +
                " Lower constraints or increase the range of N being" +
                " tested.")

        self.zero_crossings = kwargs.pop('zero_crossings', None)
        if self.zero_crossings is not None:
            for i in range(len(self.zero_crossings)):
                if type(self.zero_crossings[i]) is not int:
                    raise TypeError(
                        "Entries in 'zero_crossings'" +
                        " are not integer.")
                if self.zero_crossings[i] < self.constraints:
                    raise ValueError(
                        'One or more specified derivatives for' +
                        ' zero crossings is less than the minimum' +
                        ' constrained' +
                        ' derivative.\n zero_crossings = ' +
                        str(self.zero_crossings)
                        + '\n' + ' Minimum Constrained Derivative = '
                        + str(self.constraints))

        self.chi_squared_limit = kwargs.pop('chi_squared_limit', None)
        self.cap = kwargs.pop('cap', None)
        if self.chi_squared_limit is not None:
            if isinstance(self.chi_squared_limit, int) or \
                    isinstance(self.chi_squared_limit, float):
                pass
            else:
                raise TypeError(
                    "Limit on maximum allowed increase in chi squared" +
                    ", 'chi_squared_limit', is not an integer or float.")
        if self.cap is not None:
            if type(self.cap) is not int:
                raise TypeError(
                    "The cap on directional exploration" +
                    ", 'cap', is not an integer.")

        self.cvxopt_maxiter = kwargs.pop('cvxopt_maxiter', 10000)
        if type(self.cvxopt_maxiter) is not int:
            raise ValueError("'cvxopt_maxiter' is not integer.")

        self.test()

    def test(self):

        if not os.path.exists(self.base_dir):
            os.mkdir(self.base_dir)

        def fit(model_type):

            chi = []
            N_passed = []
            for i in range(len(self.N)):
                try:

                    result = smooth(
                        self.x, self.y, self.N[i],
                        model_type=model_type,
                        fit_type=self.fit_type, pivot_point=self.pivot_point,
                        constraints=self.constraints,
                        cvxopt_maxiter=self.cvxopt_maxiter,
                        zero_crossings=self.zero_crossings,
                        cap=self.cap, chi_squared_limit=self.chi_squared_limit)

                    if model_type == 'loglog_polynomial':
                        chi.append(np.sum((self.y - result.y_fit)**2))
                    else:
                        chi.append(result.optimum_chi)
                    N_passed.append(self.N[i])
                except Exception:
                    print(
                        'Unable to fit with N = ' + str(self.N[i]) + ' and ' +
                        str(model_type) + '.')

            if chi != []:
                chi = np.array(chi)
                min_chi = chi.min()
                for i in range(len(chi)):
                    if chi[i] == chi.min():
                        best_N = self.N[i]
            else:
                chi = np.nan
                min_chi = np.nan
                N_passed = np.nan
                best_N = np.nan

            return chi, min_chi, N_passed, best_N

        chi_poly, min_chi_poly, N_poly, best_N_poly = fit('polynomial')
        chi_np, min_chi_np, N_np, best_N_np = fit('normalised_polynomial')
        chi_log, min_chi_log, N_log, best_N_log = fit('log_polynomial')
        chi_loglog, min_chi_loglog, N_loglog, best_N_loglog = \
            fit('loglog_polynomial')
        chi_leg, min_chi_leg, N_leg, best_N_leg = fit('legendre')
        chi_exp, min_chi_exp, N_exp, best_N_exp = fit('exponential')
        chi_dif, min_chi_dif, N_dif, best_N_dif = fit('difference_polynomial')

        plt.figure()
        plt.plot(N_np, chi_np, label='Normalised Polynomial', c='b')
        plt.plot(N_poly, chi_poly, label='Polynomial', c='k')
        plt.plot(N_dif, chi_dif, label='Difference Polynomial', c='r')
        plt.plot(N_log, chi_log, label=r'Log Polynomial', c='g')
        plt.plot(N_loglog, chi_loglog, label=r'Log Log Polynomial', c='orange')
        plt.plot(N_leg, chi_leg, label='Legendre', c='purple')
        plt.plot(N_exp, chi_exp, label='Exponential', c='magenta')
        plt.legend()
        plt.xlabel(r'N')
        plt.xticks(self.N)
        plt.ylabel(r'$\chi^2$')
        plt.yscale('log')
        plt.tight_layout()
        plt.savefig(self.base_dir + 'Basis_functions.pdf')
        plt.close()