Source code for leavitt.rrlfit

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.signal import find_peaks, peak_prominences
from scipy.interpolate import interp1d
from dl import queryClient as qc
from astropy.table import Table
from collections import Counter
import statsmodels.api as sm
import os
from . import utils
from . import psearch_py3 as psearch


[docs]
def get_data(nms, bands = ['u','g','r','i','z','Y','VR']):
    """Query list of objects by name, extract light curves, 
       error, filters and top N estimated periods."""
    res = qc.query(sql="""select objectid,mjd,mag_auto,magerr_auto,filter,fwhm
                          from nsc_dr2.meas 
                          where objectid in {}""".format(tuple(nms)),fmt='table')
    res['mjd'] += np.random.randn(len(res))*1e-10
    
    selbnds = [i for i, val in enumerate(res['filter']) if val in bands]
    selfwhm = np.where(res['fwhm'] <= 4.0)[0]
    sel = [x for x in selbnds if x in selfwhm]
    res = res[sel]
    res['fltr']   = -1
    for i in range(len(res)):
        res['fltr'][i] = bands.index(res['filter'][i])

    res.rename_column('mag_auto', 'mag')
    res.rename_column('magerr_auto', 'err')
    res.sort(['fltr','mjd'])
    
    return res



[docs]
def get_periods(mjd,mag,err,fltr,objname='',outdir='results/plots',N = 10,
                pmin=.2,bands=['u','g','r','i','z','Y','VR'],verbose=False):
    
    # The filter information here uses indices determined from the order they
    # appear in bands. To run psearch we want to reassign these indices to remove
    # any unused bands. For example, if only 'g', 'r' and 'z' are used, indices
    # should be 0,1,2 and not 1,2,4.
    
    fltlist = np.array(fltr)
    
    sel1 = [i for i,val in enumerate(err) if val<.2]
    sel  = [i for i,val in enumerate(fltlist) if np.sum(fltlist[sel1]==val)>1]
    
    fltrnms = np.array(bands)[np.unique(fltr[sel])]
    mapping = {v:k for k,v in enumerate(set(fltr[sel]))}
    modflts = np.array([mapping[y] for y in fltr[sel]]).astype(np.float64)
    
    if len(fltrnms) < 1:
        raise Exception("No bands with multiple data, can't run PSearch.")
        
    print(Counter(modflts))
    dphi = 0.02
    plist, psiarray, thresh = \
            psearch.psearch_py( mjd[sel], mag[sel], err[sel], 
                                modflts, fltrnms, pmin, dphi, verbose=verbose )
    
    if len(fltrnms) > 1:
        psi = psiarray.sum(0)
    elif len(fltrnms) == 1:
        psi = psiarray
    
    pkinds = find_peaks(psi,distance=len(plist)**.45)[0]
    prom   = peak_prominences(psi,pkinds)[0]
    inds0  = pkinds[np.argsort(-prom)[:10*N]]
    inds   = inds0[np.argsort(-psi[inds0])[:N]]
    plot_periodogram(plist,psi,inds,objname,outdir)
    
    return plist, psi, inds



[docs]
def plot_periodogram(prds,psi,inds,objname='',outdir='results/plots'):
   
    fig, ax = plt.subplots(figsize=(10,7))
        
    ax.plot(prds,psi,lw=0.1)
    ax.scatter(prds[inds[1:]],psi[inds[1:]],c='k',s=10)
    ax.scatter(prds[inds[0]],psi[inds[0]],c='r',s=12)
    
    ax.set_xlabel('log period (days)',fontsize=18)
    ax.set_ylabel('psi',fontsize=18)
    ax.set_title('{} Periodogram'.format(objname),fontsize=20)
    ax.set_xscale('log')
    ax.text(0.7,0.9,'best period = {:.3f} days'.format(prds[inds[0]]),transform=ax.transAxes,color='r')
    
    fig.savefig(outdir+'\\{}_periodogram.png'.format(objname))
    
    # create zoomed in copy
    # ax.set_title('{} Periodogram Zoomed In'.format(objname),fontsize=20)
    # minp = min(prds[inds])
    # maxp = max(prds[inds])
    # ax.set_xlim(minp*.67,maxp*1.33)
    # fig.savefig(outdir+'\\{}_periodogram_zoomedin.png'.format(objname))
    
    plt.close(fig)
    return



[docs]
def rollAve(lst, k=5):
    """
    Returns the rolling average of the list,
    averaging k terms at a time.
    """
    ret = np.cumsum(lst)
    ret[k:] = ret[k:] - ret[:-k]
    
    return ret[k - 1:] / k



[docs]
def gaussAveSlow(t,y,std=.01):
    """
    Does an average in y with weights based on distances to all other points in t.
    """
    assert len(y) == len(t)
    n = len(t)
    dists = (t.reshape(n,1) - t.reshape(1,n))
    dists[dists> .5] -= 1
    dists[dists<-.5] += 1
    w = np.exp(-.5*dists**2/std**2)
    return np.sum( w*y, axis=1)/np.sum(w,axis=1)



[docs]
def gaussAve(t,y,std=0.01,nei=50):
    n = len(t)
    out = np.zeros(n,float)
    for i in range(n):
        if i-nei<0:
            t1 = t[0:i+nei]
            y1 = y[0:i+nei]
            t2 = t[(i-nei):]-1.0
            y2 = y[(i-nei):]
            t1 = np.hstack((t1,t2))
            y1 = np.hstack((y1,y2))
        elif i+nei>n:
            t1 = t[i-nei:]
            y1 = y[i-nei:]
            t2 = t[:(i+nei)-n]+1.0
            y2 = y[:(i+nei)-n]
            t1 = np.hstack((t1,t2))
            y1 = np.hstack((y1,y2))	    
        else:
            lo = np.maximum(i-nei,0)
            hi = np.minimum(i+nei,n)
            t1 = t[lo:hi]
            y1 = y[lo:hi]
        deltat = t1-t[i]
        w = np.exp(-.5*deltat**2/std**2)
        out[i] = np.sum(w*y1)/np.sum(w)
    return out




[docs]
def checkHarmonics(t,y,p,hlim=5,std=.01,fast=None):
    testp = p/np.union1d(np.arange(1,hlim+1),1/(np.arange(1,hlim+1)))
    return p_test(t,y,testp[testp>.1],std=std,fast=fast)



[docs]
def p_refine(t,y,p,pwidth=.01,N=1000,std=.01,fast=None):
    testp = np.linspace(p-p*pwidth,p+p*pwidth,N)
    testp = np.union1d(testp,p)
    return p_test(t,y,testp[testp>.1],std=std,fast=fast)



[docs]
def p_test(t,y,plst,std=.01,fast=None):
    """
    Find period from plst that produces the minimum string length.
    Gaussian weighted averages may be more accurate but will be 
    slower than the rolling average.
    """
    if fast is None:
        if 3.6e-8 * len(plst) * len(t)**2 > 75:
            fast = True
        else:
            fast = False
    mindl = 9**99
    besti = 0
    for i,prd in enumerate(plst):
        phase = t/prd %1
        mlst = y[np.argsort(phase)]
        phase.sort()
        
        if fast:
            k    = int(len(mlst)**.75*std*np.log(4)**.5+3)
            mlst = rollAve(np.append(mlst,mlst[:k-1]),k=k)
            mlst = np.roll(mlst,int(k/2))
        else:
            mlst  = gaussAve(phase,mlst,std=std)
        
        dy = np.abs( np.roll(mlst,-1) - mlst )
        dt = (np.roll(phase,-1) - phase) 
        dt[-1] += 1
        dl = (dt**2+dy**2)**.5
        if np.sum(dl) < mindl:
            mindl = np.sum(dl)
            besti = i
    return plst[besti]



[docs]
class RRLfitter:
    """
    Object used to fit templates to data. Initialize with the templates you plan
    to compare against and lists of the bands and amplitude ratios for those bands
    Templates should be in an Astropy table with columns for the phase and each
    unique template. Column names are assumed to be template names.
    """
    def __init__ (self, tmps, fltnames= ['u','g','r','i','z','Y','VR'], ampratio=[1.81480451,1.46104910,1.0,0.79662171,0.74671563,0.718746,1.050782]):
        # constants
        self.tmps     = tmps # Table containing templates
        self.fltnames = fltnames # list of names of usable filters
        self.Nflts    = len(fltnames) # number of usable filters
        self.ampratio = np.array(ampratio)
        # model variables
        self.fltinds  = [] # list of filter index values (0:'u', 1:'g', etc.)
        self.tmpind   = 1  # index of template (1,2,...,N) being used.
        self.period   = 1
        

[docs]
    def model(self, t, *args):
        """modify the template using peak-to-peak amplitude and yoffset
        input times t should be epoch folded, phase shift to match template"""
        t0 = args[0]
        amplist = (args[1] * self.ampratio)[self.fltinds]
        yofflist = np.array(args[2:])[self.fltinds]
        
        ph = (t - t0) / self.period %1
        template = interp1d(self.tmps.columns[0],self.tmps.columns[self.tmpind])(ph)
        
        mag = template * amplist + yofflist
        
        return mag



[docs]
    def tmpfit(self,mjd,mag,err,fltinds,plist,initpars=None, verbose=False):
        self.fltinds = fltinds
        if isinstance(plist, (int,float)):
            plist = [plist]
            
        if initpars is None:
            initpars = np.zeros( 2 + self.Nflts )
            initpars[0]  = min(mjd)
            ampest = []
            for f in set(fltinds):
                initpars[f+2] = min(mag[fltinds==f])
                fampest = (max(mag[fltinds==f])-min(mag[fltinds==f]))/self.ampratio[f]
                ampest.append(fampest)
                
            initpars[1]  = np.mean(ampest)
        
            if verbose:
                print("Initial Parameters:")
                print("t0:",initpars[0])
                print("r amp:",initpars[1])
                print("y offset:",initpars[2:])
        
        bounds = ( np.zeros(2+self.Nflts), np.zeros(2+self.Nflts))
        bounds[0][0] =  0.0
        bounds[1][0] = np.inf
        bounds[0][1] =  0.0
        bounds[1][1] = 50.0
        bounds[0][2:]=-50.0
        bounds[1][2:]= 50.0

        for i in set(range(self.Nflts))-set(self.fltinds):
            initpars[2+i]  =   0
            bounds[0][2+i] = -10**-6
            bounds[1][2+i] =  10**-6
        
        minx2    = 2**99
        bestpars = np.zeros( 2 + self.Nflts )
        besttmp  =-1
        besterr  = 0
        bestprd  = 0
        for p in plist:
            self.period = p
            
            for n in range(1,len(self.tmps.columns)):
                self.tmpind = n
                
                try:
                    pars, cov = curve_fit(self.model, mjd, mag, 
                                          bounds=bounds, sigma=err,
                                          p0=initpars, maxfev=9000)
                except RuntimeError:
                    continue
                    
                x2 = sum((self.model(mjd,*pars)-mag)**2/err**2)
                if x2 < minx2:
                    minx2 = x2
                    bestpars = pars
                    besterr = np.sqrt(np.diag(cov))
                    bestprd = p
                    besttmp = n
                    
        self.period = bestprd
        self.tmpind = besttmp
        
        if verbose:
            print("Results: ")
            print("t0:",bestpars[0])
            print("r amp:",bestpars[1])
            print("y offset:",bestpars[2:])
            print("Period:",bestprd)
            print("Best Template:",self.tmps.colnames[besttmp])
            print("Chi Square:",minx2)
            
        return bestpars, bestprd, besterr, besttmp, minx2




[docs]
def fit_plot(fitter,objname,cat=None,plist=None,N=10,verbose=False,dirpath=''):

    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    path = dirpath

    
    if cat is None:
        if verbose:
            print('Get data')
        cat = get_data(objname,bands=fitter.fltnames)
    if plist is None:
        if verbose:
            print('Get Periods')
        ps,psi,inds  = get_periods(cat['mjd'],cat['mag'],cat['err'],cat['fltr'],
                             objname=objname,bands=fitter.fltnames,N=N)
        plist = ps[inds]
    if verbose:
        print('First Fit')
    # Fit curve
    pars,p,err,tmpind,chi2 = fitter.tmpfit(cat['mjd'],cat['mag'],cat['err'],cat['fltr'],plist,verbose=verbose)
    
    if verbose:
        print(pars)
        print('Outlier Rejection')   
    # Reject outliers, select inliers
    resid   = np.array(cat['mag']-fitter.model(cat['mjd'],*pars))
    cat['inlier'] = abs(resid)<utils.mad(resid)*5
    
    if verbose:
        print(sum(~cat['inlier']),'outliers rejected')
        print('Second Fit')
    # Fit with inliers only
    pars,p,err,tmpind,chi2 = fitter.tmpfit(cat['mjd'][cat['inlier']],cat['mag'][cat['inlier']],
                                         cat['err'][cat['inlier']],cat['fltr'][cat['inlier']],plist,pars,verbose=verbose)
    
    redchi2 = chi2/(sum(cat['inlier'])-len(set(cat['fltr'][cat['inlier']]))-2)
    if verbose:
        print(pars)
    # get the filters with inlier data (incase it's different from all data)
    inlierflts = set(cat['fltr'][cat['inlier']])
    
    # Add phase to cat and sort
    cat['ph'] = (cat['mjd'] - pars[0]) / p %1
    cat.sort(['fltr','ph'])
    fitter.fltinds = cat['fltr']
    
    if verbose:
        print('Start Plotting')
    # Plot
    colors  = ['#1f77b4','#2ca02c','#d62728','#9467bd','#8c564b','y','#ff7f0e']
    nf      = len(inlierflts) # Number of filters with inliers
    fig, ax = plt.subplots(nf, figsize=(12,4*(nf**.75+1)), sharex=True)
    if nf == 1:
        ax  = [ax]
        
    for i,f in enumerate(inlierflts):
        sel    = cat['fltr'] == f
        ax[i].scatter(cat['ph'][sel],cat['mag'][sel],c=colors[f])
        ax[i].scatter(cat['ph'][sel]+1,cat['mag'][sel],c=colors[f])
        tmpmag = np.tile(fitter.tmps.columns[tmpind]*pars[1]*fitter.ampratio[f]+pars[2:][f],2)
        tmpph  = np.tile(fitter.tmps['PH'],2)+([0]*len(fitter.tmps['PH'])+[1]*len(fitter.tmps['PH']))
        ax[i].plot(tmpph,tmpmag,c='k')
        xsel   = sel*(~cat['inlier'])
        ax[i].scatter(cat['ph'][xsel],cat['mag'][xsel],c='k',marker='x')
        ax[i].scatter(cat['ph'][xsel]+1,cat['mag'][xsel],c='k',marker='x')
        ax[i].invert_yaxis()
        ax[i].set_ylabel(fitter.fltnames[f], fontsize=20)
    
    ax[-1].set_xlabel('Phase', fontsize=20)
    ax[0].set_title("Object: {}    Period: {:.3f} d    Type: {}".format(
                                        objname,p,fitter.tmps.colnames[tmpind]), fontsize=22)
    
    path = dirpath  + '{}_plot.png'.format(objname)
    fig.savefig(path)
    if verbose:
        print('Saved to',path)
        print('Save parameters')
    plt.close(fig)
    
    # save parameters and results
    res = Table([[objname]],names=['name'])
    res['period'] = p
    res['t0']     = pars[0]
    res['r amp']  = pars[1]
    for i in range(2,len(pars)):
        f = fitter.fltnames[i-2]
        res['{} mag'.format(f)] = pars[i]
    res['chi2']   = chi2
    res['redchi2']= redchi2
    res['template']= fitter.tmps.colnames[tmpind]
    res['t0 err']     = err[0]
    res['amp err']  = err[1]
    for i in range(2,len(err)):
        f = fitter.fltnames[i-2]
        res['{} mag err'.format(f)] = err[i]
    res['Ndat']      = len(cat)
    res['N inliers'] = sum(cat['inlier'])
    for i in range(len(fitter.fltnames)):
        f = fitter.fltnames[i]
        res['N {}'.format(f)] = sum(cat['fltr'][cat['inlier']]==i)
    
    path = dirpath  + '{}_res.fits'.format(objname)

    res.write(path,format='fits',overwrite=True)
    
    if verbose:
        print('Saved to',path)
        print('end')
    return
Navigation

Source code for leavitt.rrlfit