Source code for WDmodel.fit

# -*- coding: UTF-8 -*-
"""
Core data processing and fitting/sampling routines
"""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import sys
import warnings
import numpy as np
import numpy.polynomial.polynomial as poly
import scipy.signal as scisig
from scipy.stats import norm
from iminuit import Minuit
from astropy.constants import c as _C
import emcee
import h5py
import six.moves.cPickle as pickle
from clint.textui import progress
from six.moves import map
from six.moves import range
from six.moves import zip
progress.STREAM = sys.stdout
from . import io
from . import passband
from . import likelihood
from . import mossampler


[docs]def polyfit_continuum(continuumdata, wave):
    """
    Fit a polynomial to the DA white dwarf continuum to normalize it - purely
    for visualization purposes

    Parameters
    ----------
    continuumdata : :py:class:`numpy.recarray`
        The continuum data.
        Must have ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
        Produced by running the spectrum through
        :py:func:`WDmodel.fit.orig_cut_lines` and extracting the pre-defined
        lines in the :py:class:`WDmodel.WDmodel.WDmodel` instance.
    wave : array-like
        The full spectrum wavelength array on which to interpolate the
        continuum model

    Returns
    -------
    cont_model : :py:class:`numpy.recarray`
        The continuum model
        Must have ``dtype=[('wave', '<f8'), ('flux', '<f8')]``

    Notes
    -----
        Roughly follows the algorithm described by the SDSS SSPP for a global
        continuum fit. Fits a red side and blue side at 5500 A separately to
        get a smooth polynomial representation. The red side uses a degree 5
        polynomial and the blue side uses a degree 9 polynomial. Then "splices"
        them together - I don't actually know how SDSS does this, but we simply
        assert the two bits are the same function - and fits the full continuum
        to a degree 9 polynomial.
    """

    cwave = continuumdata.wave
    cflux = continuumdata.flux
    cdflux = continuumdata.flux_err
    w = 1./cdflux**2.

    # divide the wavelengths into a blue and red side at 5500 Angstroms
    maskblue = (cwave <= 5500.)
    maskred  = (cwave  > 5500.)
    outblue  = ( wave <= 5500.)
    outred   = ( wave  > 5500.)

    mublue = []
    mured  = []
    # fit a degree 9 polynomial to the blueside
    if len(cwave[maskblue]) > 0 and len(wave[outblue]) > 0:
        coeffblue = poly.polyfit(cwave[maskblue], cflux[maskblue], deg=9, w=w[maskblue])
        mublue = poly.polyval(cwave[maskblue], coeffblue)

    # fit a degree 5 polynomial to the redside
    if len(cwave[maskred]) > 0 and len(wave[outred]) > 0:
        coeffred = poly.polyfit(cwave[maskred], cflux[maskred], deg=5, w=w[maskred])
        mured = poly.polyval(cwave[maskred], coeffred)

    # splice the two fits together
    mu = np.hstack((mublue,mured))

    # fit a degree 9 polynomial to the spliced continuum
    coeff = poly.polyfit(cwave, mu, deg=9, w=w)

    # get the continuum model at the requested wavelengths
    out = poly.polyval(wave, coeff)
    names=str("wave,flux")
    cont_model = np.rec.fromarrays([wave, out], names=names)
    return cont_model


[docs]def orig_cut_lines(spec, model):
    """
    Cut out the hydrogen Balmer spectral lines defined in
    :py:class:`WDmodel.WDmodel.WDmodel` from the spectrum.

    The masking of Balmer lines is basic, and not very effective at high
    surface gravity or low temperature, or in the presence of non hydrogen
    lines. It's used to get a roughly masked set of data suitable for continuum
    detection, and is effective in the context of our ground-based
    spectroscopic followup campaign for HST GO 12967 and 13711 programs.

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    model : :py:class:`WDmodel.WDmodel.WDmodel` instance
        The DA White Dwarf SED model generator

    Returns
    -------
    linedata : :py:class:`numpy.recarray`
        The observations of the spectrum corresponding to the hydrogen Balmer
        lines. 
        Has ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8'), ('line_mask', 'i4'), (line_ind', 'i4')]``
    continuumdata : :py:class:`numpy.recarray`
        The continuum data. Has ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``

    Notes
    -----
        Does a coarse cut to remove hydrogen absorption lines from DA white
        dwarf spectra The line centroids, and widths are fixed and defined with
        the model grid This is insufficient, and particularly at high surface
        gravity and low temperatures the lines are blended. This routine is
        intended to provide a rough starting point for the process of continuum
        determination.
    """

    wave    = spec.wave
    flux    = spec.flux
    fluxerr = spec.flux_err
    balmerwaveindex = {}
    line_wave     = np.array([], dtype='float64', ndmin=1)
    line_flux     = np.array([], dtype='float64', ndmin=1)
    line_fluxerr  = np.array([], dtype='float64', ndmin=1)
    line_number   = np.array([], dtype='int', ndmin=1)
    line_ind      = np.array([], dtype='int', ndmin=1)
    for x in range(1,7):
        W0, ZE = model._get_line_indices(wave, x)
        # save the central wavelengths and spectrum specific indices for each line
        # we don't need this for the fit, but we do need this for plotting
        x_wave, x_flux, x_fluxerr = model._extract_from_indices(wave, flux, ZE, df=fluxerr)
        balmerwaveindex[x] = W0, ZE
        line_wave    = np.hstack((line_wave, x_wave))
        line_flux    = np.hstack((line_flux, x_flux))
        line_fluxerr = np.hstack((line_fluxerr, x_fluxerr))
        line_number  = np.hstack((line_number, np.repeat(x, len(x_wave))))
        line_ind     = np.hstack((line_ind, ZE[0]))
    # continuum data is just the spectrum with the Balmer lines removed
    continuumdata  = (np.delete(wave, line_ind), np.delete(flux, line_ind), np.delete(fluxerr, line_ind))
    linedata = (line_wave, line_flux, line_fluxerr, line_number, line_ind)
    names=str('wave,flux,flux_err,line_mask,line_ind')
    linedata = np.rec.fromarrays(linedata, names=names)
    names=str('wave,flux,flux_err')
    continuumdata = np.rec.fromarrays(continuumdata, names=names)
    return linedata, continuumdata


[docs]def blotch_spectrum(spec, linedata):
    """
    Automagically remove cosmic rays and gaps from spectrum

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    linedata : :py:class:`numpy.recarray`
        The observations of the spectrum corresponding to the hydrogen Balmer
        lines. 
        Must have 
        ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8'), ('line_mask', 'i4'), ('line_ind','i4')]``
        Produced by :py:func:`orig_cut_lines`

    Returns
    -------
    spec : :py:class:`numpy.recarray`
        The blotched spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``

    Notes
    -----
        Some spectra have nasty cosmic rays or gaps in the data. This routine
        does a reasonable job blotching these by Wiener filtering the spectrum,
        marking features that differ significantly from the local variance in
        the region, and replace them with the filtered values. The hydrogen
        Balmer lines are preserved, so if your gap/cosmic ray lands on a line
        it will not be filtered. Additionally, filtering has edge effects, and
        these data are preserved as well. If you do blotch the spectrum, it is
        highly recommended that you use the bluelimit and redlimit options to
        trim the ends of the spectrum. Note that the spectrum will be rejected
        if it has flux or flux errors that are not finite or below zero. This
        is often the case with cosmic rays and gaps, so you will likely
        have to do some manual removal of these points.  
            
        YOU SHOULD PROBABLY PRE-PROCESS YOUR DATA YOURSELF BEFORE FITTING IT
        AND NOT BE LAZY! THIS ROUTINE ONLY EXISTS TO FIT QUICK LOOK SPECTRUM AT
        THE TELESCOPE, BEFORE FINAL REDUCTIONS!
    """

    message = 'You have requested the spectrum be blotched. You should probably do this by hand. Caveat emptor.'
    warnings.warn(message, UserWarning)

    window = 151
    blueend = spec.flux[0:window]
    redend  = spec.flux[-window:]

    # wiener filter the spectrum
    med_filt = scisig.wiener(spec.flux, mysize=window)
    diff = np.abs(spec.flux - med_filt)

    # calculate the running variance with the same window
    sigma = scisig.medfilt(diff, kernel_size=window)

    # the sigma is really a median absolute deviation
    scaling = norm.ppf(3/4.)
    sigma/=scaling

    mask = (diff > 5.*sigma)

    # clip the bad outliers from the spectrum
    spec.flux[mask] = med_filt[mask]

    # restore the original lines, so that they aren't clipped
    saveind = linedata.line_ind
    spec.flux[saveind] = linedata.flux
    spec.flux[0:window] = blueend
    spec.flux[-window:] = redend
    return spec


[docs]def rebin_spec_by_int_factor(spec, f=1):
    """
    Rebins a spectrum by an integer factor f

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    f : int, optional
        an integer factor to rebin the spectrum by. Default is ``1`` (no rebinning)

    Returns
    -------
    rspec : :py:class:`numpy.recarray`
        The rebinned spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``

    Notes
    -----
        If the spectrum is not divisible by f, the edges are trimmed by discarding
        the remainder measurements from both ends. If the remainder itself is odd, the
        extra measurement is discarded from the blue side.
    """

    f    = int(f)
    if f <= 1:
        return spec
    nwave     = len(spec.wave)
    rnwave    = nwave//f
    # if the spectrum is not perfectly divisible by the f, cut the remainder out
    # divide it evenly between blue and red
    remainder = nwave%f
    ncut      = remainder//2
    icut_blue = ncut + (remainder%2)
    icut_red  = nwave - ncut
    ispec     = spec[icut_blue:icut_red]
    rf        = [(np.mean(ispec.wave[f*x:f*x+f]),\
                  np.average(ispec.flux[f*x:f*x+f],\
                             weights=1./ispec.flux_err[f*x:f*x+f]**2.,\
                             returned=True))\
                 for x in range(rnwave)]
    rwave, rf = list(zip(*rf))
    rflux, rw = list(zip(*rf))
    rw = np.array(rw)
    rflux_err = 1./(rw**0.5)
    rwave = np.array(rwave)
    rflux  = np.array(rflux)
    names=str('wave,flux,flux_err')
    rspec = np.rec.fromarrays((rwave, rflux, rflux_err),names=names)
    return rspec


[docs]def pre_process_spectrum(spec, bluelimit, redlimit, model, params,\
        lamshift=0., vel=0., rebin=1, blotch=False, rescale=False):
    """
    Pre-process the spectrum before fitting

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    bluelimit : None or float
        Trim wavelengths bluer than this limit. Uses the bluest wavelength of spectrum if ``None``
    redlimit : None or float
        Trim wavelengths redder than this limit. Uses the reddest wavelength of spectrum if ``None``
    model : :py:class:`WDmodel.WDmodel.WDmodel` instance
        The DA White Dwarf SED model generator
    params : dict
        A parameter dict such as that produced by
        :py:func:`WDmodel.io.read_params`
        Will be modified to adjust the spectrum normalization parameters ``dl``
        limits if ``rescale`` is set
    lamshift : float, optional
        Apply a flat wavelength shift to the spectrum. Useful if the target was
        not properly centered in the slit, and the shift is not correlated with
        wavelength. Default is ``0``.
    vel : float, optional
        Apply a velocity shift to the spectrum. Default is ``0``.
    rebin : int, optional 
        Integer factor by which to rebin the spectrum.
        Default is ``1`` (no rebinning).
    blotch : bool, optional
        Attempt to remove cosmic rays and gaps from spectrum. Only to be used
        for quick look analysis at the telescope.
    rescale : bool, optional
        Rescale the spectrum to make the median noise ``~1``. Has no effect on
        fitted parameters except spectrum flux normalization parameter ``dl``
        but makes residual plots, histograms more easily interpretable as they
        can be compared to an ``N(0, 1)`` distribution.

    Returns
    -------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``


    See Also
    --------
    :py:func:`orig_cut_lines`
    :py:func:`blotch_spectrum`
    :py:func:`rebin_spec_by_int_factor`
    :py:func:`polyfit_continuum`
    """

    # Test that the array is monotonic
    model._wave_test(spec.wave)

    out_params = io.copy_params(params)

    # offset and blueshift/redshift the spectrum
    if lamshift != 0.:
        spec.wave += lamshift
    if vel != 0.:
        spec.wave *= (1. +(vel*1000./_C.value))

    # clip the spectrum to whatever range is requested
    if bluelimit is None:
        bluelimit = spec.wave.min()

    if bluelimit > 0:
        bluelimit = float(bluelimit)
    else:
        bluelimit = spec.wave.min()

    if redlimit is None:
        redlimit = spec.wave.max()

    if redlimit > 0:
        redlimit = float(redlimit)
    else:
        redlimit = spec.wave.max()

    if rescale:
        _, scalemask = model._get_indices_in_range(spec.wave, bluelimit, redlimit)
        scale_factor = np.median(spec.flux_err[scalemask])
        if scale_factor == 0:
            message = 'Uhhhh. This spectrum has incredibly weird errors with a median of 0'
            raise RuntimeError(message)
        message = "Scaling the spectrum by {:.10g}".format(scale_factor)
        print(message)
        spec.flux /= scale_factor
        spec.flux_err /= scale_factor
        if out_params['dl']['value'] is not None:
            out_params['dl']['value'] *= (scale_factor)**0.5
        lb, ub = out_params['dl']['bounds']
        out_params['dl']['bounds'] = (lb*(scale_factor**0.5), ub*(scale_factor**0.5))
        out_params['dl']['scale'] *= (scale_factor)**0.5
    else:
        scale_factor = 1.

    # if requested, rebin the spectrum
    if rebin != 1:
        spec = rebin_spec_by_int_factor(spec, f=rebin)

    # get a coarse mask of line and continuum
    linedata, continuumdata  = orig_cut_lines(spec, model)
    if blotch:
        spec = blotch_spectrum(spec, linedata)

    # get a coarse estimate of the full continuum
    # this isn't used for anything other than cosmetics
    cont_model = polyfit_continuum(continuumdata, spec.wave)

    # trim the outputs to the requested length
    _, usemask = model._get_indices_in_range(spec.wave, bluelimit, redlimit)
    spec = spec[usemask]
    cont_model = cont_model[usemask]

    _, usemask = model._get_indices_in_range(linedata.wave, bluelimit, redlimit)
    linedata = linedata[usemask]

    _, usemask = model._get_indices_in_range(continuumdata.wave, bluelimit, redlimit)
    continuumdata = continuumdata[usemask]
    return spec, cont_model, linedata, continuumdata, scale_factor, out_params


[docs]def quick_fit_spec_model(spec, model, params):
    """
    Does a quick fit of the spectrum to get an initial guess of the fit parameters

    Uses iminuit to do a rough diagonal fit - i.e. ignores covariance.
    For simplicity, also fixed FWHM and Rv (even when set to be fit).
    Therefore, only teff, logg, av, dl are fit for (at most).
    This isn't robust, but it's good enough for an initial guess.

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    model : :py:class:`WDmodel.WDmodel.WDmodel` instance
        The DA White Dwarf SED model generator
    params : dict
        A parameter dict such as that produced by
        :py:func:`WDmodel.io.read_params`

    Returns
    -------
    migrad_params : dict
        The output parameter dictionary with updated initial guesses stored in
        the ``value`` key. Same format as ``params``.

    Raises
    ------
    RuntimeError
        If all of ``teff, logg, av, dl`` are set as fixed - there's nothing to fit.
    RuntimeWarning
        If :py:func:`minuit.Minuit.migrad` or :py:func:`minuit.Minuit.hesse` indicate that the fit is unreliable

    Notes
    -----
        None of the starting values for the parameters maybe ``None`` EXCEPT ``c``.
        This refines the starting guesses, and determines a reasonable value for ``c``
    """

    teff0 = params['teff']['value']
    logg0 = params['logg']['value']
    av0   = params['av']['value']
    dl0   = params['dl']['value']

    # we don't actually fit for these values
    rv   = params['rv']['value']
    fwhm = params['fwhm']['value']

    fix_teff = params['teff']['fixed']
    fix_logg = params['logg']['fixed']
    fix_av   = params['av']['fixed']
    fix_dl   = params['dl']['fixed']

    if all((fix_teff, fix_logg, fix_av, fix_dl)):
        message = "All of teff, logg, av, dl are marked as fixed - nothing to fit."
        raise RuntimeError(message)

    pixel_scale = 1./np.median(np.gradient(spec.wave))

    if dl0 is None:
        # only dl and fwhm are allowed to have None as input values
        # fwhm will get set to a default fwhm if it's None
        mod = model._get_obs_model(teff0, logg0, av0, fwhm, spec.wave, rv=rv, pixel_scale=pixel_scale)
        c0   = spec.flux.mean()/mod.mean()
        dl0 = (1./(4.*np.pi*c0))**0.5

    teff_scale = params['teff']['scale']
    logg_scale = params['logg']['scale']
    av_scale   = params['av']['scale']
    dl_scale   = params['dl']['scale']

    teff_bounds = params['teff']['bounds']
    logg_bounds = params['logg']['bounds']
    av_bounds   = params['av']['bounds']
    dl_bounds   = params['dl']['bounds']

    # ignore the covariance and define a simple chi2 to minimize
    def chi2(teff, logg, av, dl):
        mod = model._get_obs_model(teff, logg, av, fwhm, spec.wave, rv=rv, pixel_scale=pixel_scale)
        mod *= (1./(4.*np.pi*(dl)**2.))
        chi2 = np.sum(((spec.flux-mod)/spec.flux_err)**2.)
        return chi2

    # use minuit to refine our starting guess
    m = Minuit(chi2, teff=teff0, logg=logg0, av=av0, dl=dl0,\
                fix_teff=fix_teff, fix_logg=fix_logg, fix_av=fix_av, fix_dl=fix_dl,\
                error_teff=teff_scale, error_logg=logg_scale, error_av=av_scale, error_dl=dl_scale,\
                limit_teff=teff_bounds, limit_logg=logg_bounds, limit_av=av_bounds, limit_dl=dl_bounds,\
                print_level=1, pedantic=True, errordef=1)

    outfnmin, outpar = m.migrad()

    result = m.values
    errors = m.errors
    # duplicate the input dictionary and update
    migrad_params = io.copy_params(params)
    for param in result:
        migrad_params[param]['value'] = result[param]
        migrad_params[param]['scale'] = errors[param]

    if outfnmin['is_valid']:
        # if minuit works, then try computing the Hessian
        try:
            m.hesse()
            # if we sucessfully computed the Hessian, then update the parameters
            #
            result = m.values
            errors = m.errors
            for param in result:
                migrad_params[param]['value'] = result[param]
                migrad_params[param]['scale'] = errors[param]
        except RuntimeError:
            message = "Something seems to have gone wrong with Hessian matrix computation. You should probably stop."
            warnings.warn(message, RuntimeWarning)
    else:
        # there's some objects for which minuit will fail - just return the original params then
        migrad_params = io.copy_params(params)

        # if the minuit fill failed, and we didn't get an initial dl guess,
        # just use the failed result value as a start this has the dubious
        # value of being better than None
        if migrad_params['dl']['value'] is None:
            migrad_params['dl']['value'] = result['dl']

        message = "Something seems to have gone wrong refining parameters with migrad. You should probably stop."
        warnings.warn(message, RuntimeWarning)
    return migrad_params


[docs]def fix_pos(pos, free_param_names, params):
    """
    Ensures that the initial positions of the :py:mod:`emcee` walkers are out of bounds

    Parameters
    ----------
    pos : array-like
        starting positions of all the walkers, such as that produced by 
        :py:func:`emcee:utils.sample_ball`
    free_param_names : iterable
        names of parameters that are free to float. Names must correspond to keys in ``params``.
    params : dict
        A parameter dict such as that produced by
        :py:func:`WDmodel.io.read_params`

    Returns
    -------
    pos : array-like
        starting positions of all the walkers, fixed to guarantee that they are
        within ``bounds`` defined in ``params``

    Notes
    -----
        :py:func:`emcee.utils.sample_ball` creates random walkers that may be
        initialized out of bounds.  These walkers get stuck as there is no step
        they can take that will make the change in loglikelihood finite.  This
        makes the chain appear strongly correlated since all the samples of one
        walker are at a fixed location. This resolves the issue by assuming
        that the parameter ``value``  was within ``bounds`` to begin with. This
        routine does not do any checking of types, values or bounds. This check
        is done by :py:func:`WDmodel.io.get_params_from_argparse` before the
        fit. If you setup the fit using an external code, you should check
        these values.

    See Also
    --------
    :py:func:`emcee.utils.sample_ball`
    :py:func:`WDmodel.io.get_params_from_argparse`
    """

    for i, name in enumerate(free_param_names):
        lb, ub = params[name]['bounds']
        p0     = params[name]['value']
        std    = params[name]['scale']
        # take a 5 sigma range
        lr, ur = (p0-5.*std, p0+5.*std)
        ll = max(lb, lr, 0.)
        ul = min(ub, ur)
        ind = np.where((pos[:,i] <= ll) | (pos[:,i] >= ul))
        nreplace = len(pos[:,i][ind])
        pos[:,i][ind] = np.random.rand(nreplace)*(ul - ll) + ll
    return pos


[docs]def hyper_param_guess(spec, phot, model, pbs, params):
    """
    Makes a guess for the parameter ``mu`` after the initial fit by
    :py:func:`quick_fit_spec_model`

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    phot : :py:class:`numpy.recarray`
        The photometry of ``objname`` with ``dtype=[('pb', 'str'), ('mag', '<f8'), ('mag_err', '<f8')]``
    model : :py:class:`WDmodel.WDmodel.WDmodel` instance
        The DA White Dwarf SED model generator
    pbs : dict
        Passband dictionary containing the passbands corresponding to
        phot.pb` and generated by :py:func:`WDmodel.passband.get_pbmodel`.
    params : dict
        A parameter dict such as that produced by
        :py:func:`WDmodel.io.read_params`

    Returns
    -------
    out_params : dict
        The output parameter dictionary with an initial guess for ``mu`` 

    Notes
    -----
        Uses the initial guess of parameters from the spectrum fit by
        :py:func:`quick_fit_spec_model` to construct an initial guess of the
        SED, and computes ``mu`` (which looks like a distance modulus, but also
        includes a normalization for the radius of the DA white dwarf, and it's
        radius) as the median difference between the observed and synthetic
        photometry.
    """
    out_params = io.copy_params(params)
    # mu has a user supplied guess - do nothing
    if params['mu']['value'] is not None:
        return out_params

    # restore the minuit best fit model
    teff = params['teff']['value']
    logg = params['logg']['value']
    av   = params['av']['value']
    rv   = params['rv']['value']
    fwhm = params['fwhm']['value']
    pixel_scale = 1./np.median(np.gradient(spec.wave))
    _, model_spec = model._get_full_obs_model(teff, logg, av, fwhm, spec.wave,\
            rv=rv, pixel_scale=pixel_scale)

    # update the mu guess if we don't have one, or the parameter isn't fixed
    if params['mu']['value'] is None or (not params['mu']['fixed']):
        # get the median between observed and model mags for the minuit model
        if phot is not None:
            model_mags = passband.get_model_synmags(model_spec, pbs)
            mu0_guess  = np.median(phot.mag - model_mags.mag)
        else:
            mu0_guess = 0.
        out_params['mu']['value'] = mu0_guess

    return out_params


[docs]def fit_model(spec, phot, model, covmodel, pbs, params,\
            objname, outdir, specfile,\
            phot_dispersion=0.,\
            samptype='ensemble', ascale=2.0,\
            ntemps=1, nwalkers=300, nburnin=50, nprod=1000, everyn=1, thin=1, pool=None,\
            resume=False, redo=False):
    """
    Core routine that models the spectrum using the white dwarf model and a
    Gaussian process with a stationary kernel to account for any flux
    miscalibration, sampling the posterior using a MCMC.

    Parameters
    ----------
    spec : :py:class:`numpy.recarray`
        The spectrum with ``dtype=[('wave', '<f8'), ('flux', '<f8'), ('flux_err', '<f8')]``
    phot : :py:class:`numpy.recarray`
        The photometry of ``objname`` with ``dtype=[('pb', 'str'), ('mag', '<f8'), ('mag_err', '<f8')]``
    model : :py:class:`WDmodel.WDmodel.WDmodel` instance
        The DA White Dwarf SED model generator
    pbs : dict
        Passband dictionary containing the passbands corresponding to
        phot.pb` and generated by :py:func:`WDmodel.passband.get_pbmodel`.
    params : dict
        A parameter dict such as that produced by
        :py:func:`WDmodel.io.read_params`
    objname : str
        object name - used to save output with correct name
    outdir : str
        controls where the chain file s written
    specfile : str
        Used in the title, and to set the name of the ``outfile``
    phot_dispersion : float, optional
        Excess photometric dispersion to add in quadrature with the
        photometric uncertainties ``phot.mag_err``. Use if the errors are
        grossly underestimated. Default is ``0.``
    samptype : ``{'ensemble', 'pt', 'gibbs'}``
        Which sampler to use. The default is ``ensemble``.
    ascale : float
        The proposal scale for the sampler. Default is ``2.``
    ntemps : int
        The number of temperatures to run walkers at. Only used if ``samptype``
        is in ``{'pt','gibbs'}`` and set to ``1.`` for ``ensemble``. See a
        short summary `review
        <https://en.wikipedia.org/wiki/Parallel_tempering>`_ for details.
        Default is ``1.``
    nwalkers : int
        The number of `Goodman and Weare walkers
        <http://msp.org/camcos/2010/5-1/p04.xhtml>`_. Default is ``300``.
    nburnin : int
        The number of steps to discard as burn-in for the Markov-Chain. Default is ``500``.
    nprod : int
        The number of production steps in the Markov-Chain. Default is ``1000``.
    everyn : int, optional
        If the posterior function is evaluated using only every nth
        observation from the data, this should be specified. Default is ``1``.
    thin : int
        Only save every ``thin`` steps to the output Markov Chain. Useful, if
        brute force way of reducing correlation between samples.
    pool : None or :py:class`emcee.utils.MPIPool`
        If running with MPI, the pool object is used to distribute the
        computations among the child process
    resume : bool
        If ``True``, restores state and resumes the chain for another ``nprod`` iterations.
    redo : bool
        If ``True``, and a chain file and state file exist, simply clobbers them.

    Returns
    -------
    free_param_names : list
        names of parameters that were fit for. Names correspond to keys in
        ``params`` and the order of parameters in ``samples``.
    samples : array-like
        The flattened Markov Chain with the parameter positions.
        Shape is ``(ntemps*nwalkers*nprod, nparam)``
    samples_lnprob : array-like
        The flattened log of the posterior corresponding to the positions in
        ``samples``. Shape is ``(ntemps*nwalkers*nprod, 1)``
    everyn :  int 
        Specifies sampling of the data used to compute the posterior. Provided
        in case we are using ``resume`` to continue the chain, and this value
        must be restored from the state file, rather than being supplied as a
        user input.
    shape : tuple
        Specifies the shape of the un-flattened chain.
        ``(ntemps, nwalkers, nprod, nparam)``
        Provided in case we are using ``resume`` to continue the chain, and
        this value must be restored from the state file, rather than being
        supplied as a user input.

    Raises
    ------
    RuntimeError
        If ``resume`` is set without the chain having been run in the first
        place.

    Notes
    -----
        Uses an Ensemble MCMC (implemented by emcee) to generate samples from
        the posterior. Does a short burn-in around the initial guess model
        parameters - either :py:mod:`minuit` or user supplied values/defaults.
        Model parameters may be frozen/fixed. Parameters can have bounds
        limiting their range. Then runs a full production change. Chain state
        is saved after every 100 production steps, and may be continued after
        the first 100 steps if interrupted or found to be too short. Progress
        is indicated visually with a progress bar that is written to STDOUT.

    See Also
    --------
    :py:mod:`WDmodel.likelihood`
    :py:mod:`WDmodel.covariance`
    """

    outfile = io.get_outfile(outdir, specfile, '_mcmc.hdf5', check=True, redo=redo, resume=resume)
    if not resume:
        # create a HDF5 file to hold the chain data
        outf = h5py.File(outfile, 'w')
    else:
        # restore some attributes from the HDF5 file to make sure the state is consistent
        try:
            outf = h5py.File(outfile, 'a')
            chain = outf['chain']
        except (IOError, OSError, KeyError) as e:
            message = '{}\nMust run fit to generate mcmc chain before attempting to resume'
            raise RuntimeError(message)

        ntemps      = chain.attrs["ntemps"]
        nwalkers    = chain.attrs["nwalkers"]
        everyn      = chain.attrs["everyn"]
        thin        = chain.attrs["thin"]
        samptype    = chain.attrs["samptype"]
        ascale      = chain.attrs["ascale"]

    # create a state file to periodically save the state of the chain
    statefile = io.get_outfile(outdir, specfile, '_state.pkl', redo=redo)

    # setup the likelihood function
    lnlike = likelihood.setup_likelihood(params)
    nparam   = lnlike.vector_size

    # get the starting position and the scales for each parameter
    init_p0  = lnlike.get_parameter_dict()
    p0       = list(init_p0.values())
    free_param_names = list(init_p0.keys())
    std = [params[x]['scale'] for x in free_param_names]

    # create a sample ball
    pos = emcee.utils.sample_ball(p0, std, size=ntemps*nwalkers)
    pos = fix_pos(pos, free_param_names, params)
    if samptype != 'ensemble':
        pos = pos.reshape(ntemps, nwalkers, nparam)

    if everyn != 1:
        inspec = spec[::everyn]
    else:
        inspec = spec

    # even if we only take every nth sample, the pixel scale is the same
    pixel_scale = 1./np.median(np.gradient(spec.wave))

    # configure the posterior function
    lnpost = likelihood.WDmodel_Posterior(inspec, phot, model, covmodel, pbs, lnlike,\
            pixel_scale=pixel_scale, phot_dispersion=phot_dispersion)

    # setup the sampler
    if samptype == 'ensemble':
        sampler = emcee.EnsembleSampler(nwalkers, nparam, lnpost,\
                a=ascale,  pool=pool)
        ntemps = 1
    else:
        logpkwargs = {'prior':True}
        loglkwargs = {'likelihood':True}
        sampler = mossampler.MOSSampler(ntemps, nwalkers, nparam, lnpost, lnpost,\
                a=ascale, pool=pool, logpkwargs=logpkwargs, loglkwargs=loglkwargs)

    # use James Guillochon's MOSSampler "gibbs"(-ish) implementation
    gibbs = False
    if samptype == 'gibbs':
        gibbs = True
    sampler_kwargs = {}
    if samptype != 'ensemble':
        inpos = pos.reshape(ntemps*nwalkers, nparam)
        if pool is None:
            lnprob0 = list(map(lnpost, inpos))
        else:
            lnprob0 = pool.map(lnpost, inpos)

        lnprob0 = np.array(lnprob0)
        lnprob0 = lnprob0.reshape(ntemps, nwalkers)
        sampler_kwargs = {'gibbs':gibbs,'lnprob0':lnprob0}
    sampler_kwargs['thin'] = thin

    # do a short burn-in
    if not resume:
        with progress.Bar(label="Burn-in", expected_size=nburnin, hide=False) as bar:
            bar.show(0)
            j = 0
            for i, result in enumerate(sampler.sample(pos, iterations=thin*nburnin, **sampler_kwargs)):
                if (i+1)%thin == 0:
                    bar.show(j+1)
                    j+=1

        # find the MAP position after the burnin
        samples        = sampler.flatchain
        samples_lnprob = sampler.lnprobability
        map_samples        = samples.reshape(ntemps, nwalkers, nburnin, nparam)
        map_samples_lnprob = samples_lnprob.reshape(ntemps, nwalkers, nburnin)
        max_ind        = np.argmax(map_samples_lnprob)
        max_ind        = np.unravel_index(max_ind, (ntemps, nwalkers, nburnin))
        max_ind        = tuple(max_ind)
        p1        = map_samples[max_ind]

        # reset the sampler
        sampler.reset()

        lnlike.set_parameter_vector(p1)
        message = "\nMAP Parameters after Burn-in"
        print(message)
        for k, v in lnlike.get_parameter_dict().items():
            message = "{} = {:f}".format(k,v)
            print(message)

        # adjust the walkers to start around the MAP position from burnin
        pos = emcee.utils.sample_ball(p1, std, size=ntemps*nwalkers)
        pos = fix_pos(pos, free_param_names, params)
        if samptype != 'ensemble':
            pos = pos.reshape(ntemps, nwalkers, nparam)

        # setup incremental chain saving
        chain = outf.create_group("chain")

        # last saved position of the chain
        laststep = 0

        # save some other attributes about the chain
        chain.attrs["nwalkers"] = nwalkers
        chain.attrs["nparam"]   = nparam
        chain.attrs["everyn"]   = everyn
        chain.attrs["ascale"]   = ascale
        chain.attrs["samptype"] = samptype
        chain.attrs["ntemps"]   = ntemps
        chain.attrs["thin"]     = thin
        chain.attrs["nprod"]    = nprod
        chain.attrs["laststep"] = laststep

        # save the parameter names corresponding to the chain
        free_param_names = np.array([str(x) for x in free_param_names])
        dt = free_param_names.dtype.str.lstrip('|').replace('U','S')
        chain.create_dataset("names",data=free_param_names.astype(np.string_), dtype=dt)

        # save the parameter configuration as well
        # this is redundant, since it is saved in the JSON file, but having it one place is nice
        names = lnlike.get_parameter_names(include_frozen=True)
        names = np.array([str(x) for x in names])
        dt = names.dtype.str.lstrip('|').replace('U','S')
        par_grp = outf.create_group("params")
        par_grp.create_dataset("names",data=names.astype(np.string_), dtype=dt)
        for param in params:
            this_par = par_grp.create_group(param)
            this_par.attrs["value"]  = params[param]["value"]
            this_par.attrs["fixed"]  = params[param]["fixed"]
            this_par.attrs["scale"]  = params[param]["scale"]
            this_par.attrs["bounds"] = params[param]["bounds"]

        # production
        dset_chain  = chain.create_dataset("position",(ntemps*nwalkers*nprod,nparam),maxshape=(None,nparam))
        dset_lnprob = chain.create_dataset("lnprob",(ntemps*nwalkers*nprod,),maxshape=(None,))
    else:
        # if we are resuming, we only need to make sure we can write the
        # resumed chain to the chain file (i.e. the arrays are properly
        # resized)
        laststep    = chain.attrs["laststep"]
        dset_chain  = chain["position"]
        dset_lnprob = chain["lnprob"]
        dset_chain.resize((ntemps*nwalkers*(laststep+nprod),nparam))
        dset_lnprob.resize((ntemps*nwalkers*(laststep+nprod),))
        chain.attrs["nprod"] = laststep+nprod

        # and that we have the state of the chain when we ended
        try:
            with open(statefile, 'rb') as f:
                pickle_kwargs = {}
                if sys.version_info[0] > 2:
                    pickle_kwargs['encoding'] = 'latin-1'
                position, lnpost, rstate = pickle.load(f, **pickle_kwargs)
        except (IOError, OSError) as e:
            message = '{}\nMust run fit to generate mcmc chain state pickle before attempting to resume'.format(e)
            raise RuntimeError(message)

        if samptype in ('pt', 'gibbs'):
            # PTsampler doesn't include rstate0 in the release version of emcee
            # this is apparently fixed on git, but not in release yet it is
            # included in run_mcmc which simply sets the sampler random_state
            # attribute - do the same thing here
            sampler.random_state = rstate
        else:
            sampler_kwargs['rstate0']=rstate
        sampler_kwargs['lnprob0']=lnpost
        pos = position

        message = "Resuming from iteration {} for {} steps".format(laststep, nprod)
        print(message)

    # write to disk before we start
    outf.flush()

    # since we're going to save the chain in HDF5, we don't need to save it in memory elsewhere
    sampler_kwargs['storechain']=False

    # run the production chain
    with progress.Bar(label="Production", expected_size=laststep+nprod, hide=False) as bar:
        bar.show(laststep)
        j = laststep
        for i, result in enumerate(sampler.sample(pos, iterations=thin*nprod, **sampler_kwargs)):
            if (i+1)%thin != 0:
                continue
            position = result[0]
            lnpost   = result[1]
            position = position.reshape((-1, nparam))
            lnpost   = lnpost.reshape(ntemps*nwalkers)
            dset_chain[ntemps*nwalkers*j:ntemps*nwalkers*(j+1),:] = position
            dset_lnprob[ntemps*nwalkers*j:ntemps*nwalkers*(j+1)] = lnpost

            # save state every 100 steps
            if (j+1)%100 == 0:
                # make sure we know how many steps we've taken so that we can resize arrays appropriately
                chain.attrs["laststep"] = j+1
                outf.flush()

                # save the state of the chain
                with open(statefile, 'wb') as f:
                    pickle.dump(result, f, 2)

            bar.show(j+1)
            j+=1

        # save the final state of the chain and nprod, laststep
        chain.attrs["nprod"]    = laststep+nprod
        chain.attrs["laststep"] = laststep+nprod
        with open(statefile, 'wb') as f:
            pickle.dump(result, f, 2)

    # save the acceptance fraction
    if resume:
        if "afrac" in list(chain.keys()):
            del chain["afrac"]
        if samptype != 'ensemble':
            if "tswap_afrac" in list(chain.keys()):
                del chain["tswap_afrac"]
    chain.create_dataset("afrac", data=sampler.acceptance_fraction)
    if samptype != 'ensemble' and ntemps > 1:
        chain.create_dataset("tswap_afrac", data=sampler.tswap_acceptance_fraction)

    samples         = np.array(dset_chain)
    samples_lnprob  = np.array(dset_lnprob)

    # finalize the chain file, close it and close the pool
    outf.flush()
    outf.close()
    if pool is not None:
        pool.close()

    # find the MAP value after production
    map_samples = samples.reshape(ntemps, nwalkers, laststep+nprod, nparam)
    map_samples_lnprob = samples_lnprob.reshape(ntemps, nwalkers, laststep+nprod)
    max_ind = np.argmax(map_samples_lnprob)
    max_ind = np.unravel_index(max_ind, (ntemps, nwalkers, laststep+nprod))
    max_ind = tuple(max_ind)
    p_final = map_samples[max_ind]
    lnlike.set_parameter_vector(p_final)
    message = "\nMAP Parameters after Production"
    print(message)

    for k, v in lnlike.get_parameter_dict().items():
        message = "{} = {:f}".format(k,v)
        print(message)
    message = "Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))
    print(message)

    # return the parameter names of the chain, the positions, posterior, and the shape of the chain
    return  free_param_names, samples, samples_lnprob, everyn, (ntemps, nwalkers, laststep+nprod, nparam)


[docs]def get_fit_params_from_samples(param_names, samples, samples_lnprob, params,\
        ntemps=1, nwalkers=300, nprod=1000, discard=5):
    """
    Get the marginalized parameters from the sample chain

    Parameters
    ----------
    param_names : list
        names of parameters that were fit for. Names correspond to keys in
        ``params`` and the order of parameters in ``samples``.
    samples : array-like
        The flattened Markov Chain with the parameter positions.
        Shape is ``(ntemps*nwalkers*nprod, nparam)``
    samples_lnprob : array-like
        The flattened log of the posterior corresponding to the positions in
        ``samples``. Shape is ``(ntemps*nwalkers*nprod, 1)``
    params : dict
        A parameter dict such as that produced by
        :py:func:`WDmodel.io.read_params`
    ntemps : int
        The number of temperatures chains were run at. Default is ``1.``
    nwalkers : int
        The number of `Goodman and Weare walkers
        <http://msp.org/camcos/2010/5-1/p04.xhtml>`_ used in the fit. Default
        is ``300``.
    nprod : int
        The number of production steps in the Markov-Chain. Default is ``1000``.
    discard : int
        percentage of nprod steps from the start of the chain to discard in
        analyzing samples

    Returns
    -------
    mcmc_params : dict
        The output parameter dictionary with updated parameter estimates,
        errors and a scale. 
        ``params``.
    out_samples : array-like
        The flattened Markov Chain with the parameter positions with the first
        ``%discard`` tossed.
    out_ samples_lnprob : array-like
        The flattened log of the posterior corresponding to the positions in
        ``samples`` with the first ``%discard`` samples tossed.

    See Also
    --------
    :py:func:`fit_model`
    """

    ndim = len(param_names)

    in_samp   = samples.reshape(ntemps, nwalkers, nprod, ndim)
    in_lnprob = samples_lnprob.reshape(ntemps, nwalkers, nprod)

    # discard the first %discard steps from all the walkers
    nstart    = int(np.ceil((discard/100.)*nprod))
    in_samp   = in_samp[:,:,nstart:,:]
    in_lnprob = in_lnprob[:,:,nstart:]

    # reflatten
    in_samp = in_samp.reshape((-1, ndim))
    in_lnprob = in_lnprob.reshape(in_samp.shape[0])

    # only select entries with finite log posterior
    # if this isn't all, something is wrong
    mask = np.isfinite(in_lnprob)

    # update the parameter dict
    for i, param in enumerate(param_names):
        x = in_samp[mask,i]
        q_16, q_50, q_84 = np.percentile(x, [16., 50., 84.])
        params[param]['value']  = q_50
        params[param]['bounds'] = (q_16, q_84)
        params[param]['errors_pm'] = (q_84 - q_50, q_50 - q_16)
        params[param]['scale']  = float(np.std(x))

    # make sure the output for fixed parameters is fixed
    fixed_params = set(params.keys()) - set(param_names)
    for param in fixed_params:
        if params[param]['fixed']:
            params[param]['scale'] = 0.
            params[param]['errors_pm'] = (0., 0.)
        else:
            # this should never happen, unless the state of the files was changed
            message = "Huh.... {} not marked as fixed but was not fit for...".format(param)
            print(message)
    return params, in_samp[mask,:], in_lnprob[mask]