Source code for convoys.single

from deprecated.sphinx import deprecated
import numpy
from scipy.special import expit, logit
import scipy.stats
import warnings

__all__ = ['KaplanMeier']


class SingleModel:
    pass  # TODO


[docs]class KaplanMeier(SingleModel):
    ''' Implementation of the Kaplan-Meier nonparametric method. '''
[docs]    def fit(self, B, T):
        ''' Fits the model

        :param B: numpy vector of shape :math:`n`
        :param T: numpy vector of shape :math:`n`
        '''
        # See https://www.math.wustl.edu/~sawyer/handouts/greenwood.pdf
        BT = [(b, t) for b, t in zip(B, T)
              if t >= 0 and 0 <= float(b) <= 1]
        if len(BT) < len(B):
            n_removed = len(B) - len(BT)
            warnings.warn('Warning! Removed %d/%d entries from inputs where '
                          'T < 0 or B not 0/1' % (n_removed, len(B)))
        B, T = ([z[i] for z in BT] for i in range(2))
        n = len(T)
        self._ts = [0.0]
        self._ss = [1.0]
        self._vs = [0.0]
        sum_var_terms = 0.0
        prod_s_terms = 1.0
        for t, b in sorted(zip(T, B)):
            d = float(b)
            self._ts.append(t)
            prod_s_terms *= 1 - d/n
            self._ss.append(prod_s_terms)
            if d == n == 1:
                sum_var_terms = float('inf')
            else:
                sum_var_terms += d / (n*(n-d))
            if sum_var_terms > 0:
                self._vs.append(1 / numpy.log(prod_s_terms)**2 * sum_var_terms)
            else:
                self._vs.append(0)
            n -= 1

        # Just prevent overflow warning when computing the confidence interval
        eps = 1e-9
        self._ss_clipped = numpy.clip(self._ss, eps, 1.0-eps)

[docs]    def predict(self, t):
        '''Returns the predicted values.'''
        t = numpy.array(t)
        res = numpy.zeros(t.shape)
        for indexes, value in numpy.ndenumerate(t):
            j = numpy.searchsorted(self._ts, value, side='right') - 1
            if j >= len(self._ts) - 1:
                # Make the plotting stop at the last value of t
                res[indexes] = float('nan')
            else:
                res[indexes] = 1 - self._ss[j]
        return res

[docs]    def predict_ci(self, t, ci=0.8):
        '''Returns the predicted values with a confidence interval.'''
        t = numpy.array(t)
        res = numpy.zeros(t.shape + (3,))
        for indexes, value in numpy.ndenumerate(t):
            j = numpy.searchsorted(self._ts, value, side='right') - 1
            if j >= len(self._ts) - 1:
                # Make the plotting stop at the last value of t
                res[indexes] = [float('nan')]*3
            else:
                z_lo, z_hi = scipy.stats.norm.ppf([(1-ci)/2, (1+ci)/2])
                res[indexes] = (
                    1 - self._ss[j],
                    1 - numpy.exp(-numpy.exp(
                            numpy.log(-numpy.log(self._ss_clipped[j]))
                            + z_hi * self._vs[j]**0.5)),
                    1 - numpy.exp(-numpy.exp(
                            numpy.log(-numpy.log(self._ss_clipped[j]))
                            + z_lo * self._vs[j]**0.5))
                    )
        return res

[docs]    @deprecated(version='0.2.0',
                reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
    def cdf(self, t, ci=None):
        '''Returns the predicted values.'''
        if ci is not None:
            return self.predict_ci(t)
        else:
            return self.predict(t)