Full-Day Autocorrelation Checking¶

by Josh Dillon and Steven Murray, last updated February 13, 2024

This notebook is designed to assess per-day data quality from just autocorrelations, enabling a quick assessment of whether the day is worth pushing through further analysis. In particular, it is designed to look for times that are particularly contaminated by broadband RFI (e.g. from lightning), picking out fraction of days worth analyzing. It's output is a an a priori flag yaml file readable by hera_qm.metrics_io functions read_a_priori_chan_flags(), read_a_priori_int_flags(), and read_a_priori_ant_flags().

Here's a set of links to skip to particular figures:

• Figure 1: Preliminary Array Flag Fraction Summary¶

• Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶

• Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶

In [1]:
import time
tstart = time.time()
In [2]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import matplotlib
import copy
from scipy.ndimage import convolve
from hera_cal import io, utils
from hera_cal.smooth_cal import dpss_filters, solve_2D_DPSS
from hera_qm import ant_class, xrfi, metrics_io
from hera_qm.time_series_metrics import true_stretches, impose_max_flag_gap, metric_convolution_flagging
from hera_filters import dspec
import warnings

from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore')  # get rid of red warnings
%config InlineBackend.figure_format = 'retina'

Parse input and output files¶

In [3]:
# get filenames
AUTO_FILE = os.environ.get("AUTO_FILE", None)
# AUTO_FILE = '/mnt/sn1/data2/2460458/zen.2460458.16881.sum.uvh5'

SUM_AUTOS_SUFFIX = os.environ.get("SUM_AUTOS_SUFFIX", 'sum.autos.uvh5')
DIFF_AUTOS_SUFFIX = os.environ.get("DIFF_AUTOS_SUFFIX", 'diff.autos.uvh5')
OUT_YAML_SUFFIX = os.environ.get("OUT_YAML_SUFFIX", '_apriori_flags.yaml')
OUT_YAML_DIR = os.environ.get("OUT_YAML_DIR", None)
# OUT_YAML_DIR = '/lustre/aoc/projects/hera/jsdillon/H6C/'

if OUT_YAML_DIR is None:
    OUT_YAML_DIR = os.path.dirname(AUTO_FILE)

auto_sums_glob = '.'.join(AUTO_FILE.split('.')[:-4]) + '.*.' + SUM_AUTOS_SUFFIX
auto_diffs_glob = auto_sums_glob.replace(SUM_AUTOS_SUFFIX, DIFF_AUTOS_SUFFIX)
out_yaml_file = os.path.join(OUT_YAML_DIR, AUTO_FILE.split('.')[-5] + OUT_YAML_SUFFIX)
In [4]:
auto_sums = sorted(glob.glob(auto_sums_glob))
print(f'Found {len(auto_sums)} *.{SUM_AUTOS_SUFFIX} files starting with {auto_sums[0]}.')

auto_diffs = sorted(glob.glob(auto_diffs_glob))
print(f'Found {len(auto_diffs)} *.{DIFF_AUTOS_SUFFIX} files starting with {auto_diffs[0]}.')
Found 174 *.sum.autos.uvh5 files starting with /mnt/sn1/data1/2460805/zen.2460805.21081.sum.autos.uvh5.
Found 174 *.diff.autos.uvh5 files starting with /mnt/sn1/data1/2460805/zen.2460805.21081.diff.autos.uvh5.

Parse settings¶

In [5]:
# bounds on zeros in spectra
good_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_GOOD", 2)))
suspect_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_SUSPECT", 8)))

# bounds on autocorrelation power
auto_power_good = (float(os.environ.get("AUTO_POWER_GOOD_LOW", 5)), float(os.environ.get("AUTO_POWER_GOOD_HIGH", 30)))
auto_power_suspect = (float(os.environ.get("AUTO_POWER_SUSPECT_LOW", 1)), float(os.environ.get("AUTO_POWER_SUSPECT_HIGH", 60)))

# bounds on autocorrelation slope
auto_slope_good = (float(os.environ.get("AUTO_SLOPE_GOOD_LOW", -0.4)), float(os.environ.get("AUTO_SLOPE_GOOD_HIGH", 0.4)))
auto_slope_suspect = (float(os.environ.get("AUTO_SLOPE_SUSPECT_LOW", -0.6)), float(os.environ.get("AUTO_SLOPE_SUSPECT_HIGH", 0.6)))

# bounds on autocorrelation RFI
auto_rfi_good = (0, float(os.environ.get("AUTO_RFI_GOOD", 1.5)))
auto_rfi_suspect = (0, float(os.environ.get("AUTO_RFI_SUSPECT", 2)))

# bounds on autocorrelation shape
auto_shape_good = (0, float(os.environ.get("AUTO_SHAPE_GOOD", 0.1)))
auto_shape_suspect = (0, float(os.environ.get("AUTO_SHAPE_SUSPECT", 0.2)))

# parse RFI settings for antenna flagging
RFI_DPSS_HALFWIDTH = float(os.environ.get("RFI_DPSS_HALFWIDTH", 300e-9)) # in s
RFI_NSIG = float(os.environ.get("RFI_NSIG", 6))

# parse settings for identifying mislabeled data by X-engine
BAD_XENGINE_ZCUT = float(os.environ.get("BAD_XENGINE_ZCUT", 10))

# DPSS settings for full-day filtering
FREQ_FILTER_SCALE = float(os.environ.get("FREQ_FILTER_SCALE", 5.0)) # in MHz
TIME_FILTER_SCALE = float(os.environ.get("TIME_FILTER_SCALE", 450.0)) # in s
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))

# A priori flag settings
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
FM_freq_range = [FM_LOW_FREQ * 1e6, FM_HIGH_FREQ * 1e6]
MAX_SOLAR_ALT = float(os.environ.get("MAX_SOLAR_ALT", 0.0)) # in degrees
SMOOTHED_ABS_Z_THRESH = float(os.environ.get("SMOOTHED_ABS_Z_THRESH", 10))
WHOLE_DAY_FLAG_THRESH = float(os.environ.get("WHOLE_DAY_FLAG_THRESH", 0.5))

for setting in ['good_zeros_per_eo_spectrum', 'suspect_zeros_per_eo_spectrum', 'auto_power_good', 'auto_power_suspect', 
                'auto_slope_good', 'auto_slope_suspect', 'auto_rfi_good', 'auto_rfi_suspect',
                'auto_shape_good', 'auto_shape_suspect', 'BAD_XENGINE_ZCUT', 'RFI_DPSS_HALFWIDTH', 'RFI_NSIG',
                'FREQ_FILTER_SCALE', 'TIME_FILTER_SCALE', 'EIGENVAL_CUTOFF', 'FM_freq_range',
                'MAX_SOLAR_ALT', 'SMOOTHED_ABS_Z_THRESH', 'WHOLE_DAY_FLAG_THRESH']:
        print(f'{setting} = {eval(setting)}')
good_zeros_per_eo_spectrum = (0, 2)
suspect_zeros_per_eo_spectrum = (0, 8)
auto_power_good = (5.0, 30.0)
auto_power_suspect = (1.0, 60.0)
auto_slope_good = (-0.4, 0.4)
auto_slope_suspect = (-0.6, 0.6)
auto_rfi_good = (0, 1.5)
auto_rfi_suspect = (0, 2.0)
auto_shape_good = (0, 0.1)
auto_shape_suspect = (0, 0.2)
BAD_XENGINE_ZCUT = 10.0
RFI_DPSS_HALFWIDTH = 3e-07
RFI_NSIG = 4.0
FREQ_FILTER_SCALE = 5.0
TIME_FILTER_SCALE = 450.0
EIGENVAL_CUTOFF = 1e-12
FM_freq_range = [87500000.0, 108000000.0]
MAX_SOLAR_ALT = 0.0
SMOOTHED_ABS_Z_THRESH = 10.0
WHOLE_DAY_FLAG_THRESH = 0.5

Classify Antennas and Find RFI Per-File¶

In [6]:
cache = {}
In [7]:
def auto_bl_zscores(data, flag_array, prior_class=None, cache={}):
    '''This function computes z-score arrays for each delay-filtered autocorrelation, normalized by the expected noise. 
    Flagged times/channels for the whole array are given 0 weight in filtering and are np.nan in the z-score.'''
    zscores = {}
    int_time = 24 * 3600 * np.median(np.diff(data.times))
    chan_res = np.median(np.diff(data.freqs))
    int_count = int(int_time * chan_res)    
    for bl in data:
        if utils.split_bl(bl)[0] != utils.split_bl(bl)[1]:
            continue
        if prior_class is not None:
            if (prior_class[utils.split_bl(bl)[0]] == 'bad'):
                continue
        wgts = np.array(np.logical_not(flag_array), dtype=np.float64)
        model, _, _ = dspec.fourier_filter(data.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',
                                            suppression_factors=[1e-9], eigenval_cutoff=[1e-9], cache=cache)
        res = data[bl] - model
        sigma = np.abs(model) / np.sqrt(int_count / 2)
        zscores[bl] = res / sigma    
        zscores[bl][flag_array] = np.nan

    return zscores
In [8]:
def rfi_from_avg_autos(data, auto_bls_to_use, prior_flags=None, nsig=RFI_NSIG):
    '''Average together all baselines in auto_bls_to_use, then find an RFI mask by looking for outliers after DPSS filtering.'''
    
    # Compute int_count for all unflagged autocorrelations averaged together
    int_time = 24 * 3600 * np.median(np.diff(data.times))
    chan_res = np.median(np.diff(data.freqs))
    int_count = int(int_time * chan_res) * len(auto_bls_to_use)
    avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls_to_use], axis=0)}
    
    # Flag RFI first with channel differences and then with DPSS
    antenna_flags, _ = xrfi.flag_autos(avg_auto, int_count=int_count, nsig=(RFI_NSIG * 5))
    if prior_flags is not None:
        antenna_flags[(-1, -1, 'ee')] = prior_flags
    _, rfi_flags = xrfi.flag_autos(avg_auto, int_count=int_count, flag_method='dpss_flagger',
                                   flags=antenna_flags, freqs=data.freqs, filter_centers=[0],
                                   filter_half_widths=[RFI_DPSS_HALFWIDTH], eigenval_cutoff=[1e-9], nsig=nsig)

    return rfi_flags
In [9]:
def classify_autos_and_preliminary_rfi(auto_sum_file, auto_diff_file):
    
    hd_sum = io.HERADataFastReader(auto_sum_file)
    sum_autos, _, _ = hd_sum.read(read_flags=False, read_nsamples=False)
    hd_diff = io.HERADataFastReader(auto_diff_file)
    diff_autos, _, _ = hd_diff.read(read_flags=False, read_nsamples=False, dtype=np.complex64, fix_autos_func=np.real)
    ants = sorted(set([ant for bl in hd_sum.bls for ant in utils.split_bl(bl)]))
    
    # check for zeros in the evens or odds
    zeros_class = ant_class.even_odd_zeros_checker(sum_autos, diff_autos, good=good_zeros_per_eo_spectrum, suspect=suspect_zeros_per_eo_spectrum)

    # check for problems in auto power or slope
    auto_power_class = ant_class.auto_power_checker(sum_autos, good=auto_power_good, suspect=auto_power_suspect)
    auto_slope_class = ant_class.auto_slope_checker(sum_autos, good=auto_slope_good, suspect=auto_slope_suspect, edge_cut=100, filt_size=17)   
    
    overall_class = zeros_class + auto_power_class + auto_slope_class
    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
    
    # find initial set of flags
    antenna_flags, array_flags = xrfi.flag_autos(sum_autos, flag_method="channel_diff_flagger", nsig=RFI_NSIG * 5, 
                                                 antenna_class=overall_class, flag_broadcast_thresh=.5)
    for key in antenna_flags:
        antenna_flags[key] = array_flags
    _, array_flags = xrfi.flag_autos(sum_autos, freqs=sum_autos.freqs, flag_method="dpss_flagger",
                                     nsig=RFI_NSIG, antenna_class=overall_class,
                                     filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH],
                                     eigenval_cutoff=[1e-9], flags=antenna_flags, mode='dpss_matrix', 
                                     cache=cache, flag_broadcast_thresh=.5)        

    # check for non-noiselike x-engine diffs
    xengine_diff_class = ant_class.non_noiselike_diff_by_xengine_checker(sum_autos, diff_autos, flag_waterfall=array_flags, 
                                                                 antenna_class=overall_class, 
                                                                 xengine_chans=96, bad_xengine_zcut=BAD_XENGINE_ZCUT)

    # update overall_class and return if all antennas are bad
    overall_class += xengine_diff_class
    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
    
    
    # Iteratively develop RFI mask, excess RFI classification, and autocorrelation shape classification
    stage = 1
    rfi_flags = np.array(array_flags)
    prior_end_states = set()
    while True:
        # compute DPSS-filtered z-scores with current array-wide RFI mask
        zscores = auto_bl_zscores(sum_autos, rfi_flags, cache=cache,
                                  prior_class=(auto_power_class + auto_slope_class + zeros_class + xengine_diff_class))
        rms = {bl: np.nanmean(zscores[bl]**2)**.5 if np.any(np.isfinite(zscores[bl])) else np.inf for bl in zscores}

        # figure out which autos to use for finding new set of flags
        candidate_autos = [bl for bl in sum_autos if overall_class[utils.split_bl(bl)[0]] != 'bad']
        if stage == 1:
            # use best half of the unflagged antennas
            med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])
            autos_to_use = [bl for bl in candidate_autos if rms[bl] <= med_rms]
        elif stage == 2:
            # use all unflagged antennas which are auto RFI good, or the best half, whichever is larger
            med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])
            best_half_autos = [bl for bl in candidate_autos if rms[bl] <= med_rms]
            good_autos = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')
                          and (auto_rfi_class[utils.split_bl(bl)[0]] == 'good')]
            autos_to_use = (best_half_autos if len(best_half_autos) > len(good_autos) else good_autos)
        elif stage == 3:
            # use all unflagged antennas which are auto RFI good or suspect
            autos_to_use = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')]

        # compute new RFI flags
        rfi_flags = rfi_from_avg_autos(sum_autos, autos_to_use)

        # perform auto shape and RFI classification
        overall_class = auto_power_class + auto_slope_class + zeros_class + xengine_diff_class
        auto_rfi_class = ant_class.antenna_bounds_checker(rms, good=auto_rfi_good, suspect=auto_rfi_suspect, bad=(0, np.inf))
        overall_class += auto_rfi_class
        auto_shape_class = ant_class.auto_shape_checker(sum_autos, good=auto_shape_good, suspect=auto_shape_suspect,
                                                        flag_spectrum=np.sum(rfi_flags, axis=0).astype(bool), 
                                                        antenna_class=overall_class)
        overall_class += auto_shape_class

        # check if the whole array is now flagged
        if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
            break
        
        # check for convergence by seeing whether we've previously gotten to this number of flagged antennas and channels
        if stage == 3:
            if (len(overall_class.bad_ants), np.sum(rfi_flags)) in prior_end_states:
                break
            prior_end_states.add((len(overall_class.bad_ants), np.sum(rfi_flags)))
        else:
            stage += 1    
    
    # return all flagged if all antennnas are bad, otherwise return overall class and rfi_flags
    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
    else:
        return overall_class, rfi_flags
        
In [10]:
classifications = {}
preliminary_rfi_flags = {}
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for auto_sum_file, auto_diff_file in list(zip(auto_sums, auto_diffs)):
        classifications[auto_sum_file], preliminary_rfi_flags[auto_sum_file] = classify_autos_and_preliminary_rfi(auto_sum_file, auto_diff_file)
In [11]:
all_ants = set([ant for auto_sum_file in classifications for ant in classifications[auto_sum_file]])
ant_flag_fracs = {ant: 0 for ant in all_ants}
for classification in classifications.values():
    for ant in classification.bad_ants:
        ant_flag_fracs[ant] += 1
ant_flag_fracs = {ant: ant_flag_fracs[ant] / len(classifications) for ant in all_ants}
In [12]:
def flag_frac_array_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), dpi=100, gridspec_kw={'width_ratios': [2, 1]})

    def flag_frac_panel(ax, antnums, radius=7, legend=False):

        ang_dict = {'Jee': (225, 405), 'Jnn': (45, 225)}
        hd_sum = io.HERADataFastReader(auto_sums[-1])
        xpos = np.array([hd_sum.antpos[ant[0]][0] for ant in all_ants if ant[0] in antnums])
        ypos = np.array([hd_sum.antpos[ant[0]][1] for ant in all_ants if ant[0] in antnums])
        scatter = ax.scatter(xpos, ypos, c='w', s=0)
        for ant in all_ants:
            antnum, pol = ant
            if antnum in antnums:
                ax.add_artist(matplotlib.patches.Wedge(tuple(hd_sum.antpos[antnum][0:2]), radius, *ang_dict[pol], color='grey'))
                flag_frac = ant_flag_fracs[ant]
                if flag_frac > .05:
                    ax.add_artist(matplotlib.patches.Wedge(tuple(hd_sum.antpos[antnum][0:2]), radius * np.sqrt(flag_frac), *ang_dict[pol], color='r'))
                ax.text(hd_sum.antpos[antnum][0], hd_sum.antpos[antnum][1], str(antnum), color='w',  va='center', ha='center', zorder=100)

        ax.axis('equal')
        ax.set_xlim([np.min(xpos) - radius * 2, np.max(xpos) + radius * 2])
        ax.set_ylim([np.min(ypos) - radius * 2, np.max(ypos) + radius * 2])
        ax.set_xlabel("East-West Position (meters)", size=12)
        ax.set_ylabel("North-South Position (meters)", size=12)

        if legend:
            legend_objs = []
            legend_labels = []

            legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='grey', markerfacecolor='grey', markersize=15))
            unflagged_nights = lambda pol: np.sum([1 - ant_flag_fracs[ant] for ant in all_ants if ant[-1] == pol])
            legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} unflagged {pol[-1]}-polarized\nantenna-nights.' for pol in ['Jee', 'Jnn']]))

            legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='red', markerfacecolor='red', markersize=15))
            unflagged_nights = lambda pol: np.sum([ant_flag_fracs[ant] for ant in all_ants if ant[-1] == pol])
            legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} flagged {pol[-1]}-polarized\nantenna-nights.' for pol in ['Jee', 'Jnn']]))        
            ax.legend(legend_objs, legend_labels, ncol=1, fontsize=12)

    flag_frac_panel(axes[0], sorted(set([ant[0] for ant in all_ants if ant[0] < 320])), radius=7)
    flag_frac_panel(axes[1], sorted(set([ant[0] for ant in all_ants if ant[0] >= 320])), radius=50, legend=True)
    plt.tight_layout()

Figure 1: Preliminary Array Flag Fraction Summary¶

Per-antenna flagging fraction of data based purely on metrics that only use autocorrelations. This is likely an underestimate of flags, since it ignores low correlation, cross-polarized antennas, and high redcal $\chi^2$, among other factors.

In [13]:
flag_frac_array_plot()
Ignoring fixed y limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed y limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed y limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed y limits to fulfill fixed data aspect with adjustable data limits.
No description has been provided for this image

Load and Average Unflagged Autocorrelations¶

In [14]:
min_flag_frac = np.min(list(ant_flag_fracs.values()))
least_flagged_ants = sorted([ant for ant in all_ants if ant_flag_fracs[ant] == min_flag_frac])
least_flagged_autos = [utils.join_bl(ant, ant) for ant in least_flagged_ants]
In [15]:
avg_autos = {}
times = []
for auto_sum_file in auto_sums:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
    hd_sum = io.HERADataFastReader(auto_sum_file)
    sum_autos, _, _ = hd_sum.read(bls=least_flagged_autos, read_flags=False, read_nsamples=False)
    avg_autos[auto_sum_file] = np.mean([sum_autos[bl] for bl in sum_autos], axis=0)
    times.extend(hd_sum.times)
times = np.array(times)
freqs = hd_sum.freqs
In [16]:
flags = np.vstack([flag for flag in preliminary_rfi_flags.values()])
avg_auto = np.vstack([auto for auto in avg_autos.values()])
In [17]:
def flag_FM(flags, freqs, freq_range=[87.5e6, 108e6]):
    '''Apply flags to all frequencies within freq_range (in Hz).'''
    flags[:, np.logical_and(freqs >= freq_range[0], freqs <= freq_range[1])] = True 
    
def flag_sun(flags, times, max_solar_alt=0):
    '''Apply flags to all times where the solar altitude is greater than max_solar_alt (in degrees).'''
    solar_altitudes_degrees = utils.get_sun_alt(times)
    flags[solar_altitudes_degrees >= max_solar_alt, :] = True    
In [18]:
flag_FM(flags, freqs, freq_range=FM_freq_range)
solar_flags = np.zeros_like(flags)
flag_sun(solar_flags, times, max_solar_alt=MAX_SOLAR_ALT)
flags |= solar_flags
all_flagged_times = np.all(flags, axis=1)

DPSS Filter Average Autocorrlations¶

In [19]:
def predict_auto_noise(auto, dt, df, nsamples=1):
    '''Predict noise on an (antenna-averaged) autocorrelation. The product of Delta t and Delta f
    must be unitless. For N autocorrelations averaged together, use nsamples=N.'''
    int_count = int(dt * df) * nsamples
    return np.abs(auto) / np.sqrt(int_count / 2)

# Figure out noise and weights
dt = np.median(np.diff(times))
int_time = 24 * 3600 * dt
chan_res = np.median(np.diff(freqs))
noise = predict_auto_noise(avg_auto, int_time, chan_res, nsamples=len(least_flagged_ants))
wgts = np.where(flags, 0, noise**-2)
In [20]:
# enable handling of missing file gaps
time_grid = np.arange(times[0], times[-1] + dt, dt)
time_indices = {i: np.searchsorted(time_grid, t) for i, t in enumerate(times)}
avg_auto_on_grid = np.zeros((len(time_grid), len(freqs)), dtype=float)
wgts_on_grid = np.zeros((len(time_grid), len(freqs)), dtype=float)
flags_on_grid = np.ones((len(time_grid), len(freqs)), dtype=bool)
for i in time_indices:
    avg_auto_on_grid[time_indices[i], :] = avg_auto[i, :]
    wgts_on_grid[time_indices[i], :] = wgts[i, :]
    flags_on_grid[time_indices[i], :] = flags[i, :]
all_flagged_times_on_grid = np.all(flags_on_grid, axis=1)    
In [21]:
time_filters, freq_filters = dpss_filters(freqs=freqs, # Hz
                                          times=time_grid, # JD
                                          freq_scale=FREQ_FILTER_SCALE,
                                          time_scale=TIME_FILTER_SCALE,
                                          eigenval_cutoff=EIGENVAL_CUTOFF)
In [22]:
model, fit_info = solve_2D_DPSS(avg_auto_on_grid, wgts_on_grid, time_filters, freq_filters, method='lu_solve')
In [23]:
noise_model = predict_auto_noise(np.abs(model), int_time, chan_res, nsamples=len(least_flagged_ants))
zscore = ((avg_auto_on_grid - model) / noise_model).real
In [24]:
def plot_z_score(flags, zscore):
    plt.figure(figsize=(14,10), dpi=100)
    extent = [freqs[0]/1e6, freqs[-1]/1e6, times[-1] - int(times[0]), times[0] - int(times[0])]
    plt.imshow(np.where(flags, np.nan, zscore.real), aspect='auto', cmap='bwr', interpolation='none', vmin=-SMOOTHED_ABS_Z_THRESH, vmax=SMOOTHED_ABS_Z_THRESH, extent=extent)
    plt.colorbar(location='top', label='z score', extend='both')
    plt.xlabel('Frequency (MHz)')
    plt.ylabel(f'JD - {int(times[0])}')
    plt.tight_layout()

Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶

This plot shows the z-score of a DPSS-filtered, deeply averaged autocorrelation, where the noise is inferred from the integration time, channel width, and DPSS model. DPSS was performed using the per-file RFI flagging analogous to that used in the file_calibration notebook, which is generally insensitive to broadband RFI.

In [25]:
plot_z_score(flags_on_grid, zscore)
No description has been provided for this image

Find Bad Time Ranges¶

In [26]:
# Propose flags for night times when there's too much temporal structure due to, e.g. lightning
sigma = TIME_FILTER_SCALE / int_time
metric = np.nanmean(np.where(flags_on_grid, np.nan, np.abs(zscore)), axis=1)
kernel = np.exp(-np.arange(-len(metric) // 2, len(metric) // 2 + 1)**2 / 2 / sigma**2)
kernel /= np.sum(kernel)
convolved_metric = np.full_like(metric, np.nan)
convolved_metric[~all_flagged_times_on_grid] = convolve(metric[~all_flagged_times_on_grid], kernel, mode='reflect')
apriori_time_flags = np.ones_like(metric, dtype=bool)
apriori_time_flags[~all_flagged_times_on_grid] = metric_convolution_flagging(metric[~all_flagged_times_on_grid], convolved_metric[~all_flagged_times_on_grid] >= SMOOTHED_ABS_Z_THRESH, 
                                                                             [0, SMOOTHED_ABS_Z_THRESH], sigma=(TIME_FILTER_SCALE / int_time), max_flag_gap=0)
In [27]:
# Flag whole day if too much of the day is flagged
apriori_flag_frac = np.mean(apriori_time_flags[~all_flagged_times_on_grid])
if apriori_flag_frac > WHOLE_DAY_FLAG_THRESH:
    print(f'A priori time flag fraction of {apriori_flag_frac:.2%} is greater than {WHOLE_DAY_FLAG_THRESH:.2%}... Flagging whole day.')
    apriori_time_flags = np.ones_like(apriori_time_flags)
In [28]:
def apriori_flag_plot():
    plt.figure(figsize=(14, 6))
    plt.semilogy(time_grid - int(time_grid[0]), np.nanmean(np.where(flags_on_grid, np.nan, np.abs(zscore)), axis=1), label='Average |z| Over Frequency')
    plt.semilogy(time_grid - int(time_grid[0]), convolved_metric, label=f'Convolved on {TIME_FILTER_SCALE}-second timescale')
    plt.axhline(SMOOTHED_ABS_Z_THRESH, color='k', ls='--', label='Threshold on Convolved |z|')
    for i, apf in enumerate(true_stretches(apriori_time_flags)):
        plt.axvspan((time_grid - int(time_grid[0]))[apf.start], (time_grid - int(time_grid[0]))[apf.stop - 1], color='r', zorder=0, alpha=.3, 
                    label=('Proposed A Priori Flags' if i == 0 else None))
    plt.legend()

    plt.xlabel(f'JD - {int(time_grid[0])}')
    plt.ylabel('Frequency Averaged |z-score|')
    plt.tight_layout()    

Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶

This plot shows the average (over frequency) magnitude of z-scores as a function of time. This metric is smoothed to pick out ranges of times where the DPSS residual reveals persistent temporal structure. Flags due to the sun being above the horizon are also shown. The unflagged range of times is required to be contiguous.

In [29]:
apriori_flag_plot()
No description has been provided for this image

Write a priori flags to a yaml¶

Also writing as a priori flags channels that are 100% flagged and antennas that are 100% flagged.

In [30]:
dt = int_time / 3600 / 24 / 2
out_yml_str = 'JD_flags: ' + str([[float(time_grid[apf][0] - dt / 2), float(time_grid[apf][-1] + dt / 2)] for apf in true_stretches(apriori_time_flags)])
out_yml_str += '\n\nfreq_flags: ' + str([[float(freqs[apf][0] - chan_res / 2), float(freqs[apf][-1] + chan_res / 2)] for apf in true_stretches(np.all(flags, axis=0))])
convert_ant = lambda ant: [int(ant[0]), str(ant[1])] if isinstance(ant, (list, tuple)) else int(ant)
out_yml_str += '\n\nex_ants: ' + str(sorted([convert_ant(ant) for ant in all_ants if ant_flag_fracs[ant] == 1])).replace("'", "").replace('(', '[').replace(')', ']')
out_yml_str += '\n\nall_ant: ' + str(sorted([convert_ant(ant) for ant in all_ants])).replace("'", "").replace('(', '[').replace(')', ']')
out_yml_str += f'\n\njd_range: [{float(times.min())}, {float(times.max())}]'
out_yml_str += f'\n\nfreq_range: [{float(freqs.min())}, {float(freqs.max())}]'

print(f'Writing the following to {out_yaml_file}\n' + '-' * (25 + len(out_yaml_file)))
print(out_yml_str)
with open(out_yaml_file, 'w') as outfile:
    outfile.writelines(out_yml_str)
Writing the following to /mnt/sn1/data1/2460805/2460805_apriori_flags.yaml
--------------------------------------------------------------------------
JD_flags: []

freq_flags: [[62362670.8984375, 62606811.5234375], [69931030.2734375, 70053100.5859375], [87509155.2734375, 108016967.7734375], [124862670.8984375, 125106811.5234375], [129989624.0234375, 130111694.3359375], [137924194.3359375, 138046264.6484375], [141464233.3984375, 141586303.7109375], [141708374.0234375, 141830444.3359375], [142074584.9609375, 142318725.5859375], [143783569.3359375, 144027709.9609375], [147445678.7109375, 147567749.0234375], [149887084.9609375, 150009155.2734375], [154281616.2109375, 154403686.5234375], [175155639.6484375, 175277709.9609375], [187362670.8984375, 187606811.5234375], [189926147.4609375, 190048217.7734375], [191146850.5859375, 191268920.8984375], [197128295.8984375, 197372436.5234375], [198104858.3984375, 198226928.7109375], [201766967.7734375, 201889038.0859375], [204940795.8984375, 205062866.2109375], [208480834.9609375, 208724975.5859375], [209945678.7109375, 210067749.0234375], [220687866.2109375, 220809936.5234375], [223129272.4609375, 223373413.0859375], [229110717.7734375, 229354858.3984375]]

ex_ants: [[4, Jee], [8, Jee], [15, Jee], [15, Jnn], [18, Jee], [18, Jnn], [20, Jnn], [21, Jee], [27, Jee], [27, Jnn], [28, Jee], [30, Jee], [30, Jnn], [32, Jnn], [33, Jnn], [34, Jee], [40, Jnn], [46, Jee], [62, Jee], [72, Jee], [76, Jee], [76, Jnn], [78, Jee], [81, Jnn], [82, Jnn], [89, Jnn], [90, Jnn], [99, Jnn], [104, Jnn], [105, Jee], [107, Jee], [107, Jnn], [108, Jee], [109, Jnn], [115, Jee], [120, Jee], [121, Jee], [132, Jee], [132, Jnn], [135, Jee], [161, Jnn], [167, Jnn], [170, Jee], [176, Jnn], [180, Jnn], [182, Jee], [199, Jnn], [200, Jee], [200, Jnn], [201, Jee], [201, Jnn], [202, Jee], [202, Jnn], [203, Jee], [203, Jnn], [209, Jnn], [212, Jnn], [218, Jnn], [219, Jee], [219, Jnn], [220, Jee], [220, Jnn], [221, Jee], [221, Jnn], [222, Jee], [222, Jnn], [227, Jnn], [233, Jnn], [236, Jee], [236, Jnn], [237, Jee], [237, Jnn], [238, Jee], [238, Jnn], [239, Jee], [239, Jnn], [251, Jee], [254, Jee], [254, Jnn], [255, Jnn], [257, Jnn], [268, Jee], [268, Jnn], [271, Jee], [271, Jnn], [273, Jnn], [286, Jnn], [321, Jee], [329, Jee], [332, Jee], [332, Jnn], [333, Jee], [340, Jnn]]

all_ant: [[3, Jee], [3, Jnn], [4, Jee], [4, Jnn], [5, Jee], [5, Jnn], [7, Jee], [7, Jnn], [8, Jee], [8, Jnn], [9, Jee], [9, Jnn], [10, Jee], [10, Jnn], [15, Jee], [15, Jnn], [16, Jee], [16, Jnn], [17, Jee], [17, Jnn], [18, Jee], [18, Jnn], [19, Jee], [19, Jnn], [20, Jee], [20, Jnn], [21, Jee], [21, Jnn], [22, Jee], [22, Jnn], [27, Jee], [27, Jnn], [28, Jee], [28, Jnn], [29, Jee], [29, Jnn], [30, Jee], [30, Jnn], [31, Jee], [31, Jnn], [32, Jee], [32, Jnn], [33, Jee], [33, Jnn], [34, Jee], [34, Jnn], [35, Jee], [35, Jnn], [36, Jee], [36, Jnn], [37, Jee], [37, Jnn], [38, Jee], [38, Jnn], [40, Jee], [40, Jnn], [41, Jee], [41, Jnn], [42, Jee], [42, Jnn], [43, Jee], [43, Jnn], [44, Jee], [44, Jnn], [45, Jee], [45, Jnn], [46, Jee], [46, Jnn], [47, Jee], [47, Jnn], [48, Jee], [48, Jnn], [49, Jee], [49, Jnn], [50, Jee], [50, Jnn], [51, Jee], [51, Jnn], [52, Jee], [52, Jnn], [53, Jee], [53, Jnn], [54, Jee], [54, Jnn], [55, Jee], [55, Jnn], [56, Jee], [56, Jnn], [57, Jee], [57, Jnn], [58, Jee], [58, Jnn], [59, Jee], [59, Jnn], [60, Jee], [60, Jnn], [61, Jee], [61, Jnn], [62, Jee], [62, Jnn], [63, Jee], [63, Jnn], [64, Jee], [64, Jnn], [65, Jee], [65, Jnn], [66, Jee], [66, Jnn], [67, Jee], [67, Jnn], [68, Jee], [68, Jnn], [69, Jee], [69, Jnn], [70, Jee], [70, Jnn], [71, Jee], [71, Jnn], [72, Jee], [72, Jnn], [73, Jee], [73, Jnn], [74, Jee], [74, Jnn], [75, Jee], [75, Jnn], [76, Jee], [76, Jnn], [77, Jee], [77, Jnn], [78, Jee], [78, Jnn], [79, Jee], [79, Jnn], [80, Jee], [80, Jnn], [81, Jee], [81, Jnn], [82, Jee], [82, Jnn], [83, Jee], [83, Jnn], [84, Jee], [84, Jnn], [85, Jee], [85, Jnn], [86, Jee], [86, Jnn], [87, Jee], [87, Jnn], [88, Jee], [88, Jnn], [89, Jee], [89, Jnn], [90, Jee], [90, Jnn], [91, Jee], [91, Jnn], [92, Jee], [92, Jnn], [93, Jee], [93, Jnn], [94, Jee], [94, Jnn], [95, Jee], [95, Jnn], [96, Jee], [96, Jnn], [97, Jee], [97, Jnn], [98, Jee], [98, Jnn], [99, Jee], [99, Jnn], [100, Jee], [100, Jnn], [101, Jee], [101, Jnn], [102, Jee], [102, Jnn], [103, Jee], [103, Jnn], [104, Jee], [104, Jnn], [105, Jee], [105, Jnn], [106, Jee], [106, Jnn], [107, Jee], [107, Jnn], [108, Jee], [108, Jnn], [109, Jee], [109, Jnn], [110, Jee], [110, Jnn], [111, Jee], [111, Jnn], [112, Jee], [112, Jnn], [113, Jee], [113, Jnn], [114, Jee], [114, Jnn], [115, Jee], [115, Jnn], [116, Jee], [116, Jnn], [117, Jee], [117, Jnn], [118, Jee], [118, Jnn], [119, Jee], [119, Jnn], [120, Jee], [120, Jnn], [121, Jee], [121, Jnn], [122, Jee], [122, Jnn], [123, Jee], [123, Jnn], [124, Jee], [124, Jnn], [125, Jee], [125, Jnn], [126, Jee], [126, Jnn], [127, Jee], [127, Jnn], [128, Jee], [128, Jnn], [129, Jee], [129, Jnn], [130, Jee], [130, Jnn], [131, Jee], [131, Jnn], [132, Jee], [132, Jnn], [133, Jee], [133, Jnn], [134, Jee], [134, Jnn], [135, Jee], [135, Jnn], [136, Jee], [136, Jnn], [137, Jee], [137, Jnn], [138, Jee], [138, Jnn], [139, Jee], [139, Jnn], [140, Jee], [140, Jnn], [141, Jee], [141, Jnn], [142, Jee], [142, Jnn], [143, Jee], [143, Jnn], [144, Jee], [144, Jnn], [145, Jee], [145, Jnn], [146, Jee], [146, Jnn], [147, Jee], [147, Jnn], [148, Jee], [148, Jnn], [149, Jee], [149, Jnn], [150, Jee], [150, Jnn], [151, Jee], [151, Jnn], [152, Jee], [152, Jnn], [153, Jee], [153, Jnn], [154, Jee], [154, Jnn], [155, Jee], [155, Jnn], [156, Jee], [156, Jnn], [157, Jee], [157, Jnn], [158, Jee], [158, Jnn], [159, Jee], [159, Jnn], [160, Jee], [160, Jnn], [161, Jee], [161, Jnn], [162, Jee], [162, Jnn], [163, Jee], [163, Jnn], [164, Jee], [164, Jnn], [165, Jee], [165, Jnn], [166, Jee], [166, Jnn], [167, Jee], [167, Jnn], [168, Jee], [168, Jnn], [169, Jee], [169, Jnn], [170, Jee], [170, Jnn], [171, Jee], [171, Jnn], [172, Jee], [172, Jnn], [173, Jee], [173, Jnn], [174, Jee], [174, Jnn], [175, Jee], [175, Jnn], [176, Jee], [176, Jnn], [177, Jee], [177, Jnn], [178, Jee], [178, Jnn], [179, Jee], [179, Jnn], [180, Jee], [180, Jnn], [181, Jee], [181, Jnn], [182, Jee], [182, Jnn], [183, Jee], [183, Jnn], [184, Jee], [184, Jnn], [185, Jee], [185, Jnn], [186, Jee], [186, Jnn], [187, Jee], [187, Jnn], [188, Jee], [188, Jnn], [189, Jee], [189, Jnn], [190, Jee], [190, Jnn], [191, Jee], [191, Jnn], [192, Jee], [192, Jnn], [193, Jee], [193, Jnn], [194, Jee], [194, Jnn], [195, Jee], [195, Jnn], [196, Jee], [196, Jnn], [197, Jee], [197, Jnn], [198, Jee], [198, Jnn], [199, Jee], [199, Jnn], [200, Jee], [200, Jnn], [201, Jee], [201, Jnn], [202, Jee], [202, Jnn], [203, Jee], [203, Jnn], [204, Jee], [204, Jnn], [205, Jee], [205, Jnn], [206, Jee], [206, Jnn], [207, Jee], [207, Jnn], [208, Jee], [208, Jnn], [209, Jee], [209, Jnn], [210, Jee], [210, Jnn], [211, Jee], [211, Jnn], [212, Jee], [212, Jnn], [213, Jee], [213, Jnn], [214, Jee], [214, Jnn], [215, Jee], [215, Jnn], [216, Jee], [216, Jnn], [217, Jee], [217, Jnn], [218, Jee], [218, Jnn], [219, Jee], [219, Jnn], [220, Jee], [220, Jnn], [221, Jee], [221, Jnn], [222, Jee], [222, Jnn], [223, Jee], [223, Jnn], [224, Jee], [224, Jnn], [225, Jee], [225, Jnn], [226, Jee], [226, Jnn], [227, Jee], [227, Jnn], [228, Jee], [228, Jnn], [229, Jee], [229, Jnn], [231, Jee], [231, Jnn], [232, Jee], [232, Jnn], [233, Jee], [233, Jnn], [234, Jee], [234, Jnn], [235, Jee], [235, Jnn], [236, Jee], [236, Jnn], [237, Jee], [237, Jnn], [238, Jee], [238, Jnn], [239, Jee], [239, Jnn], [240, Jee], [240, Jnn], [241, Jee], [241, Jnn], [242, Jee], [242, Jnn], [243, Jee], [243, Jnn], [244, Jee], [244, Jnn], [245, Jee], [245, Jnn], [246, Jee], [246, Jnn], [250, Jee], [250, Jnn], [251, Jee], [251, Jnn], [252, Jee], [252, Jnn], [253, Jee], [253, Jnn], [254, Jee], [254, Jnn], [255, Jee], [255, Jnn], [256, Jee], [256, Jnn], [257, Jee], [257, Jnn], [261, Jee], [261, Jnn], [262, Jee], [262, Jnn], [266, Jee], [266, Jnn], [267, Jee], [267, Jnn], [268, Jee], [268, Jnn], [269, Jee], [269, Jnn], [270, Jee], [270, Jnn], [271, Jee], [271, Jnn], [272, Jee], [272, Jnn], [273, Jee], [273, Jnn], [281, Jee], [281, Jnn], [282, Jee], [282, Jnn], [283, Jee], [283, Jnn], [284, Jee], [284, Jnn], [285, Jee], [285, Jnn], [286, Jee], [286, Jnn], [287, Jee], [287, Jnn], [295, Jee], [295, Jnn], [320, Jee], [320, Jnn], [321, Jee], [321, Jnn], [322, Jee], [322, Jnn], [323, Jee], [323, Jnn], [324, Jee], [324, Jnn], [325, Jee], [325, Jnn], [326, Jee], [326, Jnn], [327, Jee], [327, Jnn], [328, Jee], [328, Jnn], [329, Jee], [329, Jnn], [331, Jee], [331, Jnn], [332, Jee], [332, Jnn], [333, Jee], [333, Jnn], [336, Jee], [336, Jnn], [340, Jee], [340, Jnn]]

jd_range: [2460805.2107580905, 2460805.249569384]

freq_range: [46920776.3671875, 234298706.0546875]
In [31]:
# check output yaml file
flagged_indices = metrics_io.read_a_priori_int_flags(out_yaml_file, time_grid)
for i, apf in enumerate(apriori_time_flags):
    if i in flagged_indices:
        assert apf
    else:
        assert not apf
flagged_chans = metrics_io.read_a_priori_chan_flags(out_yaml_file, freqs=freqs)
for chan in range(len(freqs)):
    if chan in flagged_chans:
        assert np.all(flags[:, chan])
    else:
        assert not np.all(flags[:, chan])
flagged_ants = metrics_io.read_a_priori_ant_flags(out_yaml_file)
for ant in all_ants:
    if ant_flag_fracs[ant] == 1:
        assert ant in flagged_ants
    else:
        assert ant not in flagged_ants

Metadata¶

In [32]:
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.7.1.dev45+g4a0c6f1
hera_qm: 2.2.1.dev2+ga535e9e
hera_filters: 0.1.6.dev9+gf165ec1
hera_notebook_templates: 0.1.dev989+gee0995d
pyuvdata: 3.1.3
In [33]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 71.95 minutes.