Source code for frb.surveys.catalog_utils

""" Methods related to fussing with a catalog"""
from astropy.table.table import QTable
import numpy as np

from astropy.coordinates import SkyCoord
from astropy.table import Table, hstack, vstack, setdiff, join
from astropy import units
from frb.galaxies.defs import valid_filters



[docs]
def clean_heasarc(catalog):
    """
    Insure RA/DEC are ra/dec in the Table

    Table is modified in place

    Args:
        catalog (astropy.table.Table): Catalog generated by astroquery

    """
    # RA/DEC
    catalog.rename_column("RA", "ra")
    catalog.rename_column("DEC", "dec")
    for key in ['ra', 'dec']:
        catalog[key].unit = units.deg




[docs]
def clean_cat(catalog, pdict, fill_mask=None):
    """
    Convert table column names intrinsic to the slurped
    catalog with the FRB survey desired values

    Args:
        catalog (astropy.table.Table): Catalog generated by astroquery
        pdict (dict):  Defines the original key and desired key
        fill_mask (int or float, optional):  Fill masked items with this value

    Returns:
        astropy.table.Table:  modified catalog

    """
    for key,value in pdict.items():
        if value in catalog.keys():
            catalog.rename_column(value, key)
    # Mask
    if fill_mask is not None:
        if catalog.mask is not None:
            catalog = catalog.filled(fill_mask)
    return catalog




[docs]
def sort_by_separation(catalog, coord, radec=('ra','dec'), add_sep=True):
    """
    Sort an input catalog by separation from input coordinate

    Args:
        catalog (astropy.table.Table):  Table of sources
        coord (astropy.coordinates.SkyCoord): Reference coordinate for sorting
        radec (tuple): Defines catalog columns holding RA, DEC (in deg)
        add_sep (bool, optional): Add a 'separation' column with units of arcmin

    Returns:
        astropy.table.Table: Sorted catalog

    """
    # Check
    for key in radec:
        if key not in catalog.keys():
            print("RA/DEC key: {:s} not in your Table".format(key))
            raise IOError("Try again..")
    # Grab coords
    cat_coords = SkyCoord(ra=catalog[radec[0]].data,
                          dec=catalog[radec[1]].data, unit='deg')

    # Separations
    seps = coord.separation(cat_coords)
    isrt = np.argsort(seps)
    # Add?
    if add_sep:
        catalog['separation'] = seps.to('arcmin')
    # Sort
    srt_catalog = catalog[isrt]
    # Return
    return srt_catalog




[docs]
def match_ids(IDs, match_IDs, require_in_match=True):
    """ Match input IDs to another array of IDs (usually in a table)
    Return the rows aligned with input IDs

    Args:
        IDs (ndarray): ID values to match
        match_IDs (ndarray):  ID values to match to
        require_in_match (bool, optional): Require that each of the
          input IDs occurs within the match_IDs

    Returns:
        ndarray: Rows in match_IDs that match to IDs, aligned -1 if there is no match

    """
    rows = -1 * np.ones_like(IDs).astype(int)
    # Find which IDs are in match_IDs
    in_match = np.in1d(IDs, match_IDs)
    if require_in_match:
        if np.sum(~in_match) > 0:
            raise IOError("qcat.match_ids: One or more input IDs not in match_IDs")
    rows[~in_match] = -1
    #
    IDs_inmatch = IDs[in_match]
    # Find indices of input IDs in meta table -- first instance in meta only!
    xsorted = np.argsort(match_IDs)
    ypos = np.searchsorted(match_IDs, IDs_inmatch, sorter=xsorted)
    indices = xsorted[ypos]
    rows[in_match] = indices
    return rows




[docs]
def summarize_catalog(frbc, catalog, summary_radius, photom_column, magnitude):
    """
    Generate simple text describing the sources from
    an input catalog within a given radius

    Args:
        frbc: FRB Candidate object
        catalog (astropy.table.Table): Catalog table
        summary_radius (Angle):  Radius to summarize on
        photom_column (str): Column specifying which flux to work on
        magnitude (bool): Is the flux a magnitude?

    Returns:
        list: List of comments on the catalog

    """
    # Init
    summary_list = []
    coords = SkyCoord(ra=catalog['ra'], dec=catalog['dec'], unit='deg')
    # Find all within the summary radius
    seps = frbc['coord'].separation(coords)
    in_radius = seps < summary_radius
    # Start summarizing
    summary_list += ['{:s}: There are {:d} source(s) within {:0.1f} arcsec'.format(
        catalog.meta['survey'], np.sum(in_radius), summary_radius.to('arcsec').value)]
    # If any found
    if np.any(in_radius):
        # Brightest
        if magnitude:
            brightest = np.argmin(catalog[photom_column][in_radius])
        else:
            brightest = np.argmax(catalog[photom_column][in_radius])
        summary_list += ['{:s}: The brightest source has {:s} of {:0.2f}'.format(
            catalog.meta['survey'], photom_column,
            catalog[photom_column][in_radius][brightest])]
        # Closest
        closest = np.argmin(seps[in_radius])
        summary_list += ['{:s}: The closest source is at separation {:0.2f} arcsec and has {:s} of {:0.2f}'.format(
            catalog.meta['survey'],
            seps[in_radius][closest].to('arcsec').value,
            photom_column, catalog[photom_column][in_radius][brightest])]
    # Return
    return summary_list




[docs]
def xmatch_catalogs(cat1:Table, cat2:Table, dist:units.Quantity = 5*units.arcsec,
                     RACol1:str = "ra", DecCol1:str = "dec",
                     RACol2:str = "ra", DecCol2:str = "dec",
                     distcol1:str = None, distcol2:str = None,
                     return_match_idx:bool=False)->tuple:
    """
    Cross matches two astronomical catalogs and returns
    the matched tables.
    Args:
        cat1, cat2: astropy Tables
            Two tables with sky coordinates to be
            matched.
        dist: astropy Quantity, optional
            Maximum separation for a valid match.
            5 arcsec by default. Can be length units
            if distcol1 and distcol2 are provided.
        RACol1, RACol2: str, optional
            Names of columns in cat1 and cat2
            respectively that contain RA in degrees.
        DecCol1, DecCol2: str, optional
            Names of columns in cat1 and cat2
            respectively that contain Dec in degrees.
        distcol1, distcol2: str, optional
            Names of columns in cat1 and cat2
            respectively that contain the radial distance
            as floats in Mpc. If None, 2D cross-matches
            are returned.
        return_match_idx: bool, optional
            Return the indices of the matched entries with
            with the distance instead?
    returns:
        match1, match2: astropy Table
            Tables of matched rows from cat1 and cat2.
        idx, d2d (if return_match_idx): ndarrays
            Indices of matched entries from table 2
            and an array of separations to go with.
    """
    assert isinstance(cat1, (Table, QTable))&isinstance(cat1, (Table, QTable)), "Catalogs must be astropy Table instances."
    assert (RACol1 in cat1.colnames)&(DecCol1 in cat1.colnames), " Could not find either {:s} or {:s} in cat1".format(RACol1, DecCol1)
    assert (RACol2 in cat2.colnames)&(DecCol2 in cat2.colnames), " Could not find either {:s} or {:s} in cat2".format(RACol2, DecCol2)
    do_3d = (distcol1 is not None)&(distcol2 is not None)
    if do_3d:
        assert (distcol1 in cat1.colnames)&(distcol2 in cat2.colnames), "Could not find either {:s} or {:s} in cat1".format(distcol1, distcol2)
        dist1 = cat1[distcol1]*units.Mpc
        dist2 = cat2[distcol2]*units.Mpc
    else:
        dist1 = None
        dist2 = None
    # Get corodinates
    cat1_coord = SkyCoord(cat1[RACol1]*units.deg, cat1[DecCol1]*units.deg, distance=dist1)
    cat2_coord = SkyCoord(cat2[RACol2]*units.deg, cat2[DecCol2]*units.deg, distance=dist2)

    # Match 2D
    idx, d2d, d3d = cat1_coord.match_to_catalog_sky(cat2_coord)

    # Get matched tables
    if do_3d:
        separation = d3d
    else:
        separation = d2d
    match1 = cat1[separation < dist]
    match2 = cat2[idx[separation < dist]]

    if return_match_idx:
        return idx, d2d, d3d
    else:
        return match1, match2



def _detect_mag_cols(photometry_table):
    """
    Searches the column names of a 
    photometry table for columns with mags.

    Args:
        photometry_table: astropy Table
            A table containing photometric
            data from a catlog.
    Returns:
        mag_colnames: list
            A list of column names with magnitudes
        mag_err_colnames: list
            A list of column names with errors
            in the magnitudes.
    """
    assert type(photometry_table)==Table, "Photometry table must be an astropy Table instance."
    allcols = photometry_table.colnames
    photom_cols = np.array(valid_filters)
    photom_errcols = np.array([filt+"_err" for filt in photom_cols])

    photom_cols = photom_cols[[elem in allcols for elem in photom_cols]]
    photom_errcols = photom_errcols[[elem in allcols for elem in photom_errcols]]
    
    return photom_cols.tolist(), photom_errcols.tolist()



[docs]
def mag_from_flux(flux, flux_err=None):
    """
    Get the AB magnitude from a flux

    Parameters
    ----------
    flux : Quantity
        Flux
    flux_err : Quantity
        Error in flux (optional)

    Returns
    -------
    mag, mag_err : float, float
        AB magnitude and its error (if flux_err is given)
        AB magnitude and `None` (if flux_err is `None`)
    """
    # convert flux to Jansky
    flux_Jy = flux.to('Jy').value

    # get mag
    mag_AB = -2.5*np.log10(flux_Jy) + 8.9

    # get error
    if flux_err is not None:
        flux_Jy_err = flux_err.to('Jy').value
        err_mag2 = (-2.5/np.log(10.) / flux_Jy)**2 * flux_Jy_err**2
        err_mag = np.sqrt(err_mag2)
    else:
        err_mag = None
    return mag_AB, err_mag


def _mags_to_flux(mag, zpt_flux:units.Quantity=3630.7805*units.Jy, mag_err=None,
                  exact_mag_err=False):
    """
    Convert a magnitude to mJy

    Args:
        mag (column): magnitude
        zpt_flux (Quantity, optional): Zero point flux for the magnitude.
            Assumes AB mags by default (i.e. zpt_flux = 3630.7805 Jy). 
        mag_err (float, optional): uncertainty in magnitude
        exact_mag_err (bool, optional): True if you are aware that the mag
            error were estimated exactly and not using a first order (in SNR)
            approximation. 
    Returns:
        flux (column): flux in mJy
        flux_err (column): if mag_err is given, a corresponding
            flux_err is returned.
    """
    # Data validation -- check for Jy
    assert (type(zpt_flux) == units.Quantity)*(zpt_flux.decompose().unit == units.kg/units.s**2), "zpt_flux units should be Jy or with dimensions kg/s^2."

    # Prepare output column
    flux = mag.copy()

    # Convert fluxes
    badmags = mag<-10
    flux[badmags] = -99.
    flux[~badmags] = zpt_flux.value*10**(-mag[~badmags]/2.5)
    
    if mag_err is not None:
        flux_err = mag_err.copy()
        baderrs = (mag_err < 0) | (mag_err == 999.)
        flux_err[baderrs] = -99.
        if exact_mag_err:
            flux_err[~baderrs] = flux[~baderrs]*(10**(mag_err[~baderrs]/2.5)-1) # exact error
        else:
            flux_err[~baderrs] = np.log(10)/2.5*flux[~baderrs]*mag_err[~baderrs] # first order approximation
        return flux, flux_err
    else:
        return flux    


[docs]
def convert_mags_to_flux(photometry_table, fluxunits='mJy', exact_mag_err=False):
    """
    Takes a table of photometric measurements
    in mags and converts it to flux units.

    ..todo..   NEED TO ADD DOCS ON VISTA, ETC..

    Args:
        photometry_table (astropy.table.Table):
            A table containing photometric
            data from a catlog.
        fluxunits (str, optional):
            Flux units to convert the magnitudes
            to, as parsed by astropy.units. Default is mJy.
        exact_mag_err (bool, optional):
            Use if you know that the mag errors were estimated
            exactly as opposed to the first-order approximation
            that is usually quoted.

    Returns:
        fluxtable: astropy Table
                `photometry_table` but the magnitudes
                are converted to fluxes.
                For upper limits, the flux is the 3sigma value and
                the error is set to -99.
    """
    fluxtable = photometry_table.copy()
    # Find columns with magnitudes based on filter names
    mag_cols, mag_errcols = _detect_mag_cols(fluxtable)
    convert = units.Jy.to(fluxunits)

    for mag, err in zip(mag_cols, mag_errcols):
        flux, flux_err = _mags_to_flux(photometry_table[mag], 
                                       mag_err=photometry_table[err],
                                       exact_mag_err=exact_mag_err)

        # Allow for bad flux values
        badflux = flux == -99.
        fluxtable[mag][badflux] = flux[badflux]
        fluxtable[mag][~badflux] = flux[~badflux]*convert

        # Allow for bad errors
        baderr = flux_err == -99.0
        fluxtable[err][baderr] = flux_err[baderr]
        fluxtable[err][~baderr] = flux_err[~baderr]*convert

        # Upper limits -- Record as 3sigma
        #   and set error to -99.
        uplimit = photometry_table[err] == 999.
        fluxtable[err][uplimit] = -99. #fluxtable[mag][uplimit] / 3.
        fluxtable[mag][uplimit] = fluxtable[mag][uplimit] 

    return fluxtable



[docs]
def remove_duplicates(tab:Table, idcol:str)->Table:
    """
    In an astropy table if there are duplicate
    entries, remove the duplicates. Generally,
    these will be duplicate objects (i.e. multiple
    observations of same object ID or the same
    entry repeated multiple times from cross-matching.)

    Args:
        tab (Table): A table of entries.
        idcol (str): A column name that has unique ids
            for each table entry.
    Returns:
        unique_tab (Table): A table with only the unique ids.
    """
    unique_tab = tab.copy()
    assert isinstance(unique_tab, Table), "Please provide an astropy table."
    assert isinstance(idcol, str), "Please provide a valid column name."
    assert idcol in tab.colnames, "{} not a column in the given table".format(idcol)
    # Sort entries first.
    unique_tab.sort(idcol)
    # Get the duplicates.
    duplicate_ids = np.where(unique_tab[1:][idcol]==unique_tab[:-1][idcol])[0]+1
    unique_tab.remove_rows(duplicate_ids)
    return unique_tab

    

[docs]
def xmatch_and_merge_cats(tab1:Table, tab2:Table, tol:units.Quantity=1*units.arcsec,
                        table_names:tuple=('1','2'), **kwargs)->Table:
    """
    Given two source catalogs, cross-match and merge them. This function 
    ensures there is a unique match between tables as opposed to the default join_skycoord
    behavior which matches multiple objects on the right table to
    a source on the left. The two tables must contain the columns 'ra' and 'dec' (case-sensitive).
    Args:
        tab1, tab2 (Table): Photometry catalogs. Must contain columns named
            ra and dec.
        tol (Quantity[Angle], optional): Maximum separation for cross-matching.
        table_names (tuple of str, optional): Names of the two tables for
            naming unique columns in the merged table.
        kwargs: Additional keyword arguments to be passed onto xmatch_catalogs
    Returns:
        merged_table (Table): Merged catalog.
    """
    if table_names is not None:
        assert len(table_names)==2, "Invalid number of table names for two tables."
        assert (type(table_names[0])==str)&(type(table_names[1])==str), "Table names should be strings."
    
    assert np.all(np.isin(['ra','dec'],tab1.colnames)), "Table 1 doesn't have column 'ra' and/or 'dec'."
    assert np.all(np.isin(['ra','dec'],tab2.colnames)), "Table 2 doesn't have column 'ra' and/or 'dec'."

    # Edge cases: zero length tables
    if len(tab1) == 0:
        return tab2.copy()
    elif len(tab2) == 0:
        return tab1.copy()

    # Cross-match tables for tab1 INTERSECTION tab2.
    matched_tab1, matched_tab2 = xmatch_catalogs(tab1, tab2, tol, **kwargs)

    # tab1 INTERSECTION tab2
    inner_join = hstack([matched_tab1, matched_tab2],
                        table_names=table_names)
    # Remove unnecessary ra/dec columns and rename remaining coordinate
    # columns corectly. 
    tab1_coord_cols = ['ra_'+table_names[0],"dec_"+table_names[0]]
    tab2_coord_cols = ['ra_'+table_names[1],"dec_"+table_names[1]]


    inner_join.remove_columns(tab2_coord_cols)
    inner_join.rename_columns(tab1_coord_cols, ['ra', 'dec'])

    # Now get all objects that weren't matched.
    if len(matched_tab1) == 0:
        not_matched_tab1 = tab1
        not_matched_tab2 = tab2
    else:
        not_matched_tab1 = tab1[~np.isin(tab1, matched_tab1)]
        not_matched_tab2 = tab2[~np.isin(tab2, matched_tab2)]

    # (tab1 UNION tab2) - (tab1 INTERSECTION tab2)

    # Are there unmatched entries in both tables?
    if (len(not_matched_tab1)!=0)&(len(not_matched_tab2)!=0):
        outer_join = join(not_matched_tab1, not_matched_tab2,
                    keys=['ra','dec'], join_type='outer', table_names=table_names)
        merged = vstack([inner_join, outer_join]).filled(999.)
    # Only table 1 has unmatched entries?
    elif (len(not_matched_tab1)!=0)&(len(not_matched_tab2)==0):
        merged = vstack([inner_join, not_matched_tab1])
    # Only table 2?
    elif (len(not_matched_tab1)==0)&(len(not_matched_tab2)!=0):
        merged = vstack([inner_join, not_matched_tab2])
    # Neither?
    else:
        merged = inner_join
    # Final cleanup. Just in case.
    weird_cols = np.isin(['ra_1','dec_1','ra_2','dec_2'],merged.colnames)
    if np.any(weird_cols):
        merged.remove_columns(np.array(['ra_1','dec_1','ra_2','dec_2'])[weird_cols])
    # Fill and return.
    return merged.filled(999.)
    
    '''
    TODO: Write this function once CDS starts working again (through astroquery) 
    def xmatch_gaia(catalog,max_sep = 5*u.arcsec,racol='ra',deccol='dec'):
        """
        Cross match against Gaia DR2
        and return the cross matched table.
        Args:
            max_sep (Angle): maximum separation to be
                            considered a valid match.
        Returns:
            xmatch_tab (Table): a table with corss matched
                                entries.
        """
    '''