Source code for mbproj2.mcmc

# -*- coding: utf-8 -*-
# Copyright (C) 2016 Jeremy Sanders <jeremy@jeremysanders.net>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA

"""For doing MCMC given Fit object."""

from __future__ import division, print_function, absolute_import

import h5py
import emcee
import numpy as N

from . import utils
from .utils import uprint
from . import forkparallel

class _MultiProcessPool:
    """Internal object to use ForkQueue to evaluate multiple profiles
    simultaneously."""

    def __init__(self, func, processes):
        self.queue = forkparallel.ForkQueue(func, processes)

    def map(self, func, args):
        """Note func is ignored here."""
        return self.queue.execute(args)

[docs]class MCMC:
    """For running Markov Chain Monte Carlo."""

    def __init__(self, fit,
                 walkers=100, processes=1, initspread=0.01):
        """
        :param Fit fit: Fit object to use for mcmc
        :param int walkers: number of emcee walkers to use
        :param int processes: number of simultaneous processes to compute likelihoods
        :param float initspread: random Gaussian width added to create initial parameters
        """

        self.fit = fit
        self.walkers = walkers
        self.numpars = len(fit.thawed)
        self.initspread = initspread

        # function for getting likelihood
        likefunc = lambda pars: fit.getLikelihood(pars)

        # pool object for returning result for multiple processes
        pool = (
            None if processes <= 1 else _MultiProcessPool(likefunc, processes))

        # for doing the mcmc sampling
        self.sampler = emcee.EnsembleSampler(
            walkers, len(fit.thawed), likefunc, pool=pool)
        # starting point
        self.pos0 = None

        # header items to write to output file
        self.header = {
            'burn': 0,
            }

    def _generateInitPars(self):
        """Generate initial set of parameters from fit."""

        thawedpars = N.array(self.fit.thawedParVals())
        assert N.all(N.isfinite(thawedpars))

        # create enough parameters with finite likelihoods
        p0 = []
        while len(p0) < self.walkers:
            p = N.random.normal(0, self.initspread, size=self.numpars) + thawedpars
            if N.isfinite(self.fit.getLikelihood(p)):
                p0.append(p)
        return p0

[docs]    def burnIn(self, length, autorefit=True, minfrac=0.2, minimprove=0.01):
        """Burn in, restarting fit and burn if necessary.

        :param bool autorefit: refit position if new minimum is found during burn in
        :param float minfrac: minimum fraction of burn in to do if new minimum found
        :param float minimprove: minimum improvement in fit statistic to do a new fit
        """

        def innerburn():
            """Returns False if new minimum found and autorefit is set"""

            bestfit = None
            bestprob = initprob = self.fit.getLikelihood(self.fit.thawedParVals())
            p0 = self._generateInitPars()

            # record period
            self.header['burn'] = length

            # iterate over burn-in period
            for i, result in enumerate(self.sampler.sample(
                    p0, iterations=length, storechain=False)):

                if i % 10 == 0:
                    uprint(' Burn %i / %i (%.1f%%)' % (
                            i, length, i*100/length))

                self.pos0, lnprob, rstate0 = result[:3]

                # new better fit
                if lnprob.max()-bestprob > minimprove:
                    bestprob = lnprob.max()
                    maxidx = lnprob.argmax()
                    bestfit = self.pos0[maxidx]

                # abort if new minimum found
                if ( autorefit and i>length*minfrac and
                     bestfit is not None ):

                    uprint('  Restarting burn as new best fit has been found '
                           ' (%g > %g)' % (bestprob, initprob) )
                    self.fit.updateThawed(bestfit)
                    self.sampler.reset()
                    return False

            self.sampler.reset()
            return True

        uprint('Burning in')
        while not innerburn():
            uprint('Restarting, as new mininimum found')
            self.fit.doFitting()

[docs]    def run(self, length):
        """Run chain.

        :param int length: length of chain
        """

        uprint('Sampling')
        self.header['length'] = length

        # initial parameters
        if self.pos0 is None:
            uprint(' Generating initial parameters')
            p0 = self._generateInitPars()
        else:
            uprint(' Starting from end of burn-in position')
            p0 = self.pos0

        # do sampling
        for i, result in enumerate(self.sampler.sample(
                p0, iterations=length)):

            if i % 10 == 0:
                uprint(' Step %i / %i (%.1f%%)' % (i, length, i*100/length))

        uprint('Done')

[docs]    def save(self, outfilename, thin=1):
        """Save chain to HDF5 file.

        :param str outfilename: output hdf5 filename
        :param int thin: save every N samples from chain
        """

        self.header['thin'] = thin

        uprint('Saving chain to', outfilename)
        with h5py.File(outfilename, 'w') as f:
            # write header entries
            for h in sorted(self.header):
                f.attrs[h] = self.header[h]

            # write list of parameters which are thawed
            f['thawed_params'] = [x.encode('utf-8') for x in self.fit.thawed]

            # output chain
            f.create_dataset(
                'chain',
                data=self.sampler.chain[:, ::thin, :].astype(N.float32),
                compression=True, shuffle=True)
            # likelihoods for each walker, iteration
            f.create_dataset(
                'likelihood',
                data=self.sampler.lnprobability[:, ::thin].astype(N.float32),
                compression=True, shuffle=True)
            # acceptance fraction
            f['acceptfrac'] = self.sampler.acceptance_fraction.astype(N.float32)
            # last position in chain
            f['lastpos'] = self.sampler.chain[:, -1, :].astype(N.float32)

        uprint('Done')