Second Round of Full Day RFI Flagging¶

by Josh Dillon, last updated October 13, 2024

This notebook is synthesizes information from individual delay_filtered_average_zscore notebooks to find low-level RFI and flag it. That notebook takes smooth_calibrated data, redundantly averages it, performs a high-pass delay filter, and then incoherently averages across baselines, creating a per-polarization z-score. This notebook then takes that whole night of z-scores and finds a new set of flags to both add to the smooth_cal files, which are updated in place, and to write down as new UVFlag waterfall-type .h5 files.

Here's a set of links to skip to particular figures and tables:

• Figure 1: Waterfall of Maximum z-Score of Either Polarization Before Round 2 Flagging¶

• Figure 2: Histogram of z-scores¶

• Figure 3: Waterfall of Maximum z-Score of Either Polarization After Round 2 Flagging¶

• Figure 4: Spectra of Time-Averaged z-Scores¶

• Figure 5: Summary of Flags Before and After Round 2 Flagging¶

In [1]:
import time
tstart = time.time()
In [2]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import matplotlib.pyplot as plt
import matplotlib
import copy
import warnings
from pyuvdata import UVFlag, UVCal
from hera_cal import utils
from hera_qm import xrfi
from hera_qm.time_series_metrics import true_stretches
from hera_filters import dspec

from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore')  # get rid of red warnings
%config InlineBackend.figure_format = 'retina'
In [3]:
# get input data file names
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459861/zen.2459861.25297.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')

# get input and output suffixes
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ZSCORE_SUFFIX =  os.environ.get("ZSCORE_SUFFIX", 'sum.red_avg_zscore.h5')
FLAG_WATERFALL2_SUFFIX = os.environ.get("FLAG_WATERFALL2_SUFFIX", 'sum.flag_waterfall_round_2.h5')
OUT_YAML_SUFFIX = os.environ.get("OUT_YAML_SUFFIX", '_aposteriori_flags.yaml')
OUT_YAML_DIR = os.environ.get("OUT_YAML_DIR", None)

# build globs
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, SMOOTH_CAL_SUFFIX)
zscore_glob = sum_glob.replace(SUM_SUFFIX, ZSCORE_SUFFIX)

# build out yaml file
if OUT_YAML_DIR is None:
    OUT_YAML_DIR = os.path.dirname(SUM_FILE)
out_yaml_file = os.path.join(OUT_YAML_DIR, SUM_FILE.split('.')[-4] + OUT_YAML_SUFFIX)    

# get flagging parameters
Z_THRESH = float(os.environ.get("Z_THRESH", 4))
WS_Z_THRESH = float(os.environ.get("WS_Z_THRESH", 2))
AVG_Z_THRESH = float(os.environ.get("AVG_Z_THRESH", 1))
MAX_FREQ_FLAG_FRAC = float(os.environ.get("MAX_FREQ_FLAG_FRAC", .25))
MAX_TIME_FLAG_FRAC = float(os.environ.get("MAX_TIME_FLAG_FRAC", .1))
AVG_SPECTRUM_FILTER_DELAY = float(os.environ.get("AVG_SPECTRUM_FILTER_DELAY", 250)) # in ns
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
TIME_AVG_DELAY_FILT_SNR_THRESH = float(os.environ.get("TIME_AVG_DELAY_FILT_SNR_THRESH", 4.0))
TIME_AVG_DELAY_FILT_SNR_DYNAMIC_RANGE = float(os.environ.get("TIME_AVG_DELAY_FILT_SNR_DYNAMIC_RANGE", 1.5))

for setting in ['Z_THRESH', 'WS_Z_THRESH', 'AVG_Z_THRESH', 'MAX_FREQ_FLAG_FRAC', 'MAX_TIME_FLAG_FRAC', 'AVG_SPECTRUM_FILTER_DELAY',
               'EIGENVAL_CUTOFF', 'TIME_AVG_DELAY_FILT_SNR_THRESH', 'TIME_AVG_DELAY_FILT_SNR_DYNAMIC_RANGE']:
    print(f'{setting} = {eval(setting)}')
Z_THRESH = 4.0
WS_Z_THRESH = 2.0
AVG_Z_THRESH = 1.0
MAX_FREQ_FLAG_FRAC = 0.25
MAX_TIME_FLAG_FRAC = 0.1
AVG_SPECTRUM_FILTER_DELAY = 250.0
EIGENVAL_CUTOFF = 1e-12
TIME_AVG_DELAY_FILT_SNR_THRESH = 4.0
TIME_AVG_DELAY_FILT_SNR_DYNAMIC_RANGE = 1.5

Load z-scores¶

In [4]:
# load z-scores
zscore_files = sorted(glob.glob(zscore_glob))
print(f'Found {len(zscore_files)} *.{ZSCORE_SUFFIX} files starting with {zscore_files[0]}.')
uvf = UVFlag(zscore_files, use_future_array_shapes=True)
Found 1571 *.sum.red_avg_zscore.h5 files starting with /mnt/sn1/data1/2460793/zen.2460793.21079.sum.red_avg_zscore.h5.
In [5]:
# get calibration solution files
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{SMOOTH_CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 1571 *.sum.smooth.calfits files starting with /mnt/sn1/data1/2460793/zen.2460793.21079.sum.smooth.calfits.
In [6]:
assert len(zscore_files) == len(cal_files)
In [7]:
# extract z-scores and correct by a single number per polarization to account for biases created by filtering
zscore = {pol: uvf.metric_array[:, :, np.argwhere(uvf.polarization_array == utils.polstr2num(pol, x_orientation=uvf.x_orientation))[0][0]] for pol in ['ee', 'nn']}
zscore = {pol: zscore[pol] - np.nanmedian(zscore[pol]) for pol in zscore}
In [8]:
freqs = uvf.freq_array
times = uvf.time_array
In [9]:
extent = [freqs[0] / 1e6, freqs[-1] / 1e6, times[-1] - int(times[0]), times[0] - int(times[0])]
In [10]:
def plot_max_z_score(zscore, flags=None, vmin=-5, vmax=5):
    if flags is None:
        flags = np.any(~np.isfinite(list(zscore.values())), axis=0)
    plt.figure(figsize=(14,10), dpi=100)
    plt.imshow(np.where(flags, np.nan, np.nanmax([zscore['ee'], zscore['nn']], axis=0)), aspect='auto', 
               cmap='coolwarm', interpolation='none', vmin=vmin, vmax=vmax, extent=extent)
    plt.colorbar(location='top', label='Max z-score of either polarization', extend='both', aspect=40, pad=.02)
    plt.xlabel('Frequency (MHz)')
    plt.ylabel(f'JD - {int(times[0])}')
    plt.tight_layout()

Figure 1: Waterfall of Maximum z-Score of Either Polarization Before Round 2 Flagging¶

Shows the worse of the two results from delay_filtered_average_zscore from either polarization. Dips near flagged channels are expected, due to overfitting of noise. Positive-going excursions are problematic and likely evidence of RFI.

In [11]:
plot_max_z_score(zscore)
All-NaN axis encountered
No description has been provided for this image
In [12]:
def plot_histogram():
    plt.figure(figsize=(14,4), dpi=100)
    bins = np.arange(-50, 100, .1)
    hist_ee = plt.hist(np.ravel(zscore['ee']), bins=bins, density=True, label='ee-polarized z-scores', alpha=.5)
    hist_nn = plt.hist(np.ravel(zscore['nn']), bins=bins, density=True, label='nn-polarized z-scores', alpha=.5)
    plt.plot(bins, (2*np.pi)**-.5 * np.exp(-bins**2 / 2), 'k:', label='Gaussian approximate\nnoise-only distribution')
    plt.axvline(WS_Z_THRESH, c='r', ls='--', label='Watershed z-score')
    plt.axvline(Z_THRESH, c='r', ls='-', label='Threshold z-score')
    plt.yscale('log')
    all_densities = np.concatenate([hist_ee[0][hist_ee[0] > 0], hist_nn[0][hist_nn[0] > 0]]) 
    plt.ylim(np.min(all_densities) / 2, np.max(all_densities) * 2)
    plt.xlim([-50, 100])
    plt.legend()
    plt.xlabel('z-score')
    plt.ylabel('Density')
    plt.tight_layout()

Figure 2: Histogram of z-scores¶

Shows a comparison of the histogram of z-scores in this file (one per polarization) to a Gaussian approximation of what one might expect from thermal noise. Without filtering, the actual distribution is a weighted sum of Rayleigh distributions. Filtering further complicates this. To make the z-scores more reliable, a single per-polarization median is subtracted from each waterfall, which allows us to flag low-level outliers with more confidence. Any points beyond the solid red line are flagged. Any points neighboring a flag beyond the dashed red line are also flagged. Finally, flagging is performed for low-level outliers in whole times or channels.

In [13]:
plot_histogram()
No description has been provided for this image

Perform flagging¶

In [14]:
def iteratively_flag_on_averaged_zscore(flags, zscore, avg_func=np.nanmean, avg_z_thresh=AVG_Z_THRESH, verbose=True):
    '''Flag whole integrations or channels based on average z-score. This is done
    iteratively to prevent bad times affecting channel averages or vice versa.'''
    flagged_chan_count = 0
    flagged_int_count = 0
    while True:
        zspec = avg_func(np.where(flags, np.nan, zscore), axis=0)
        ztseries = avg_func(np.where(flags, np.nan, zscore), axis=1)

        if (np.nanmax(zspec) < avg_z_thresh) and (np.nanmax(ztseries) < avg_z_thresh):
            break

        if np.nanmax(zspec) >= np.nanmax(ztseries):
            flagged_chan_count += np.sum((zspec >= np.nanmax(ztseries)) & (zspec >= avg_z_thresh))
            flags[:, (zspec >= np.nanmax(ztseries)) & (zspec >= avg_z_thresh)] = True
        else:
            flagged_int_count += np.sum((ztseries >= np.nanmax(zspec)) & (ztseries >= avg_z_thresh))
            flags[(ztseries >= np.nanmax(zspec)) & (ztseries >= avg_z_thresh), :] = True

    if verbose:
        print(f'\tFlagging an additional {flagged_int_count} integrations and {flagged_chan_count} channels.')

def impose_max_chan_flag_frac(flags, max_flag_frac=MAX_FREQ_FLAG_FRAC, verbose=True):
    '''Flag channels already flagged more than max_flag_frac (excluding completely flagged times).'''
    unflagged_times = ~np.all(flags, axis=1)
    frequently_flagged_chans =  np.mean(flags[unflagged_times, :], axis=0) >= max_flag_frac
    if verbose:
        print(f'\tFlagging {np.sum(frequently_flagged_chans) - np.sum(np.all(flags, axis=0))} channels previously flagged {max_flag_frac:.2%} or more.')        
    flags[:, frequently_flagged_chans] = True 
        
def impose_max_time_flag_frac(flags, max_flag_frac=MAX_TIME_FLAG_FRAC, verbose=True):
    '''Flag times already flagged more than max_flag_frac (excluding completely flagged channels).'''
    unflagged_chans = ~np.all(flags, axis=0)
    frequently_flagged_times =  np.mean(flags[:, unflagged_chans], axis=1) >= max_flag_frac
    if verbose:
        print(f'\tFlagging {np.sum(frequently_flagged_times) - np.sum(np.all(flags, axis=1))} times previously flagged {max_flag_frac:.2%} or more.')
    flags[frequently_flagged_times, :] = True

def time_avg_zscore_dly_filt_SNRs(flags, filter_delay=AVG_SPECTRUM_FILTER_DELAY, eigenval_cutoff=EIGENVAL_CUTOFF):
    """Produces SNRs after time-averaging z-scores and delay filtering, accounting for flagging's effect on the filter."""
    # figure out high and low band based on FM gap at 100 MHz
    flagged_stretches = true_stretches(np.all(flags, axis=0))
    FM_gap = [fs for fs in flagged_stretches if fs.start <= np.argmin(np.abs(freqs - 100e6)) < fs.stop][0]
    low_band = slice((0 if flagged_stretches[0].start != 0 else flagged_stretches[0].stop), FM_gap.start)
    high_band = slice(FM_gap.stop, (len(freqs) if flagged_stretches[-1].stop != len(freqs) else flagged_stretches[-1].start))
    
    filt_SNR = {}
    for pol in zscore:
        # calculate timeavg_SNR and filter
        noise_prediction = 1.0 / np.sum(~flags, axis=0)**.5
        timeavg_SNR = np.nanmean(np.where(flags, np.nan, zscore[pol] / noise_prediction), axis=0) 
        wgts = np.where(np.isfinite(timeavg_SNR), 1, 0)
        model = np.zeros_like(timeavg_SNR)
        for band in [low_band, high_band]:
            model[band], _, _ = dspec.fourier_filter(freqs[band], np.where(np.isfinite(timeavg_SNR[band]), timeavg_SNR[band], 0),
                                                     wgts[band], [0], [AVG_SPECTRUM_FILTER_DELAY / 1e9], mode="dpss_solve", 
                                                     eigenval_cutoff=[EIGENVAL_CUTOFF], suppression_factors=[EIGENVAL_CUTOFF])
        filt_SNR[pol] = timeavg_SNR - model

        # correct for impact of filter
        correction_factors = np.ones_like(wgts) * np.nan
        for band in [low_band, high_band]:
            X = dspec.dpss_operator(freqs[band], [0], filter_half_widths=[AVG_SPECTRUM_FILTER_DELAY / 1e9], eigenval_cutoff=[EIGENVAL_CUTOFF])[0]
            W = wgts[band]
            leverage = np.diag(X @ np.linalg.pinv(np.dot(X.T * W, X)) @ (X.T * W))
            correction_factors[band] = np.where(leverage > 0, (1 - leverage)**.5, np.nan) # because the underlying data should be gaussian
        filt_SNR[pol] /= correction_factors
    
    return filt_SNR

def iteratively_flag_on_delay_filtered_time_avg_zscore(flags, thresh=TIME_AVG_DELAY_FILT_SNR_THRESH, dynamic_range=TIME_AVG_DELAY_FILT_SNR_DYNAMIC_RANGE,
                                                       filter_delay=AVG_SPECTRUM_FILTER_DELAY, eigenval_cutoff=EIGENVAL_CUTOFF):
    """Flag whole channels based on their outlierness after delay-filterd time-averaged zscores.
    This is done iteratively since the delay filter can be unduly influenced by large outliers."""
    filt_SNR = time_avg_zscore_dly_filt_SNRs(flags, filter_delay=AVG_SPECTRUM_FILTER_DELAY, eigenval_cutoff=EIGENVAL_CUTOFF)
    while True:
        largest_SNR = np.nanmax(list(filt_SNR.values()))
        if largest_SNR < thresh:
            break
        # 
        cut = np.max([thresh, largest_SNR / dynamic_range])
        for pol in filt_SNR:
            flags[:, filt_SNR[pol] > cut] = True
        filt_SNR = time_avg_zscore_dly_filt_SNRs(flags, filter_delay=AVG_SPECTRUM_FILTER_DELAY, eigenval_cutoff=EIGENVAL_CUTOFF)
In [15]:
flags = np.any(~np.isfinite(list(zscore.values())), axis=0)
print(f'{np.mean(flags):.3%} of waterfall flagged to start.')

# flag whole integrations or channels using outliers in median
while True:
    nflags = np.sum(flags)
    for pol in ['ee', 'nn']:    
        iteratively_flag_on_averaged_zscore(flags, zscore[pol], avg_func=np.nanmedian, avg_z_thresh=AVG_Z_THRESH, verbose=True)
        impose_max_chan_flag_frac(flags, max_flag_frac=MAX_FREQ_FLAG_FRAC, verbose=True)
        impose_max_time_flag_frac(flags, max_flag_frac=MAX_TIME_FLAG_FRAC, verbose=True)
    if np.sum(flags) == nflags:
        break  
print(f'{np.mean(flags):.3%} of waterfall flagged after flagging whole times and channels with median z > {AVG_Z_THRESH}.')

# flag largest outliers
for pol in ['ee', 'nn']:
    flags |= (zscore[pol] > Z_THRESH) 
print(f'{np.mean(flags):.3%} of waterfall flagged after flagging z > {Z_THRESH} outliers.')
    
# watershed flagging
while True:
    nflags = np.sum(flags)
    for pol in ['ee', 'nn']:
        flags |= xrfi._ws_flag_waterfall(zscore[pol], flags, WS_Z_THRESH)
    if np.sum(flags) == nflags:
        break
print(f'{np.mean(flags):.3%} of waterfall flagged after watershed flagging on z > {WS_Z_THRESH} neighbors of prior flags.')
        
# flag whole integrations or channels using outliers in mean
while True:
    nflags = np.sum(flags)
    for pol in ['ee', 'nn']:    
        iteratively_flag_on_averaged_zscore(flags, zscore[pol], avg_func=np.nanmean, avg_z_thresh=AVG_Z_THRESH, verbose=True)
        impose_max_chan_flag_frac(flags, max_flag_frac=MAX_FREQ_FLAG_FRAC, verbose=True)
        impose_max_time_flag_frac(flags, max_flag_frac=MAX_TIME_FLAG_FRAC, verbose=True)
    if np.sum(flags) == nflags:
        break  
print(f'{np.mean(flags):.3%} of waterfall flagged after flagging whole times and channels with average z > {AVG_Z_THRESH}.')

# flag channels based on delay filter
iteratively_flag_on_delay_filtered_time_avg_zscore(flags, thresh=TIME_AVG_DELAY_FILT_SNR_THRESH, dynamic_range=TIME_AVG_DELAY_FILT_SNR_DYNAMIC_RANGE,
                                                   filter_delay=AVG_SPECTRUM_FILTER_DELAY, eigenval_cutoff=EIGENVAL_CUTOFF)
print(f'{np.mean(flags):.3%} of flagging channels that are {TIME_AVG_DELAY_FILT_SNR_THRESH}σ outliers after delay filtering the time average.')

# watershed flagging again
while True:
    nflags = np.sum(flags)
    for pol in ['ee', 'nn']:
        flags |= xrfi._ws_flag_waterfall(zscore[pol], flags, WS_Z_THRESH)
    if np.sum(flags) == nflags:
        break
print(f'{np.mean(flags):.3%} of waterfall flagged after another round of watershed flagging on z > {WS_Z_THRESH} neighbors of prior flags.')
22.191% of waterfall flagged to start.
All-NaN slice encountered
	Flagging an additional 1117 integrations and 11 channels.
	Flagging 1 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 555 integrations and 0 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 0 integrations and 6 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 0 integrations and 0 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 0 integrations and 0 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 0 integrations and 0 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
66.062% of waterfall flagged after flagging whole times and channels with median z > 1.0.
66.797% of waterfall flagged after flagging z > 4.0 outliers.
69.027% of waterfall flagged after watershed flagging on z > 2.0 neighbors of prior flags.
	Flagging an additional 0 integrations and 0 channels.
	Flagging 92 channels previously flagged 25.00% or more.
	Flagging 384 times previously flagged 10.00% or more.
Mean of empty slice
Mean of empty slice
	Flagging an additional 0 integrations and 0 channels.
	Flagging 5 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 0 integrations and 0 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
	Flagging an additional 0 integrations and 0 channels.
	Flagging 0 channels previously flagged 25.00% or more.
	Flagging 0 times previously flagged 10.00% or more.
78.579% of waterfall flagged after flagging whole times and channels with average z > 1.0.
Mean of empty slice
Casting complex values to real discards the imaginary part
Casting complex values to real discards the imaginary part
84.684% of flagging channels that are 4.0σ outliers after delay filtering the time average.
84.812% of waterfall flagged after another round of watershed flagging on z > 2.0 neighbors of prior flags.

Show results of flagging¶

Figure 3: Waterfall of Maximum z-Score of Either Polarization After Round 2 Flagging¶

The same as Figure 1, but after the flagging performed in this notebook.

In [16]:
plot_max_z_score(zscore, flags=flags)
All-NaN axis encountered
No description has been provided for this image
In [17]:
def zscore_spectra(ylim=[-3, 3], flags=flags):
    fig, axes = plt.subplots(2, 1, figsize=(14,6), dpi=100, sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['ee', 'nn']):

        ax.plot(freqs / 1e6, np.nanmean(zscore[pol], axis=0),'r', label=f'{pol}-Polarization Before Round 2 Flagging', lw=.5)
        ax.plot(freqs / 1e6, np.nanmean(np.where(flags, np.nan, zscore[pol]), axis=0), label=f'{pol}-Polarization After Round 2 Flagging')
        ax.legend(loc='lower right')
        ax.set_ylabel('Time-Averged Z-Score\n(Excluding Flags)')
        ax.set_ylim(ylim)
    axes[1].set_xlabel('Frequency (MHz)')
    plt.tight_layout()

Figure 4: Spectra of Time-Averaged z-Scores¶

The average along the time axis of Figures 1 and 3 (though now separated per-polarization). This plot is useful for showing channels with repeated low-level RFI.

In [18]:
zscore_spectra()
Mean of empty slice
Mean of empty slice
No description has been provided for this image
In [19]:
def summarize_flagging(flags=flags):
    plt.figure(figsize=(14,10), dpi=100)
    cmap = matplotlib.colors.ListedColormap(((0, 0, 0),) + matplotlib.cm.get_cmap("Set2").colors[0:2])
    plt.imshow(np.where(np.any(~np.isfinite(list(zscore.values())), axis=0), 1, np.where(flags, 2, 0)), 
               aspect='auto', cmap=cmap, interpolation='none', extent=extent)
    plt.clim([-.5, 2.5])
    cbar = plt.colorbar(location='top', aspect=40, pad=.02)
    cbar.set_ticks([0, 1, 2])
    cbar.set_ticklabels(['Unflagged', 'Previously Flagged', 'Flagged Here Using Delayed Filtered z-Scores'])
    plt.xlabel('Frequency (MHz)')
    plt.ylabel(f'JD - {int(times[0])}')
    plt.tight_layout()

Figure 5: Summary of Flags Before and After Round 2 Flagging¶

This plot shows which times and frequencies were flagged before and after this notebook. It is directly comparable to Figure 5 of the first round full_day_rfi notebook.

In [20]:
summarize_flagging()
The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.
No description has been provided for this image

Save results¶

In [21]:
add_to_history = 'by full_day_rfi_round_2 notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
In [22]:
tind = 0
always_flagged_ants = set()
ever_unflagged_ants = set()
for cal_file in cal_files:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
        
        # update cal_file
        uvc = UVCal()
        uvc.read(cal_file, use_future_array_shapes=True)
        uvc.flag_array |= (flags[tind:tind + len(uvc.time_array), :].T)[None, :, :, None]
        uvc.history += 'Modified ' + add_to_history
        uvc.write_calfits(cal_file, clobber=True)
        
        # keep track of flagged antennas
        for antnum in uvc.ant_array:
            for antpol in ['Jee', 'Jnn']:
                if np.all(uvc.get_flags(antnum, antpol)):
                    if (antnum, antpol) not in ever_unflagged_ants:
                        always_flagged_ants.add((antnum, antpol))
                else:
                    ever_unflagged_ants.add((antnum, antpol))
                    always_flagged_ants.discard((antnum, antpol))
                

        # Create new flag object
        uvf_out = UVFlag(uvc, waterfall=True, mode='flag')
        uvf_out.flag_array |= flags[tind:tind + len(uvc.time_array), :, None]
        uvf_out.history += 'Produced ' + add_to_history
        uvf_out.write(cal_file.replace(SMOOTH_CAL_SUFFIX, FLAG_WATERFALL2_SUFFIX), clobber=True)
        
        # increment time index
        tind += len(uvc.time_array)

print(f'Saved {len(cal_files)} *.{FLAG_WATERFALL2_SUFFIX} files starting with {cal_files[0].replace(SMOOTH_CAL_SUFFIX, FLAG_WATERFALL2_SUFFIX)}.')
Saved 1571 *.sum.flag_waterfall_round_2.h5 files starting with /mnt/sn1/data1/2460793/zen.2460793.21079.sum.flag_waterfall_round_2.h5.
In [23]:
# write summary of entirely flagged times/freqs/ants to yaml
all_flagged_times = np.all(flags, axis=1)
all_flagged_freqs = np.all(flags, axis=0)
all_flagged_ants = [(int(ant[0]), ant[1]) for ant in sorted(always_flagged_ants)]

dt = np.median(np.diff(times))
out_yml_str = 'JD_flags: ' + str([[float(times[flag_stretch][0] - dt / 2), float(times[flag_stretch][-1] + dt / 2)] 
                                  for flag_stretch in true_stretches(all_flagged_times)])
df = np.median(np.diff(freqs))
out_yml_str += '\n\nfreq_flags: ' + str([[float(freqs[flag_stretch][0] - df / 2), float(freqs[flag_stretch][-1] + df / 2)] 
                                         for flag_stretch in true_stretches(all_flagged_freqs)])
out_yml_str += '\n\nex_ants: ' + str(all_flagged_ants).replace("'", "").replace('(', '[').replace(')', ']')

print(f'Writing the following to {out_yaml_file}\n' + '-' * (25 + len(out_yaml_file)))
print(out_yml_str)
with open(out_yaml_file, 'w') as outfile:
    outfile.writelines(out_yml_str)
Writing the following to /mnt/sn1/data1/2460793/2460793_aposteriori_flags.yaml
------------------------------------------------------------------------------
JD_flags: [[2460793.2106799623, 2460793.248484622], [2460793.249043863, 2460793.25038604], [2460793.250945281, 2460793.2513926732], [2460793.251728217, 2460793.2522874577], [2460793.252399306, 2460793.2530703945], [2460793.253405939, 2460793.2538533313], [2460793.2540770276, 2460793.254300724], [2460793.2549718125, 2460793.255419205], [2460793.2557547493, 2460793.2558665974], [2460793.2581035593, 2460793.2582154074], [2460793.2583272555, 2460793.2585509517], [2460793.258774648, 2460793.258998344], [2460793.2602286735, 2460793.2604523697], [2460793.2634722684, 2460793.2636959646], [2460793.263919661, 2460793.264143357], [2460793.264926294, 2460793.26514999], [2460793.265821079, 2460793.265932927], [2460793.266044775, 2460793.2662684713], [2460793.2663803194, 2460793.2666040156], [2460793.267051408, 2460793.2673869524], [2460793.267834345, 2460793.268058041], [2460793.268505433, 2460793.2690646737], [2460793.26928837, 2460793.269512066], [2460793.270183155, 2460793.2706305473], [2460793.27107794, 2460793.271301636], [2460793.272196421, 2460793.272420117], [2460793.273091206, 2460793.273314902], [2460793.2735385983, 2460793.2736504464], [2460793.2742096866, 2460793.274433383], [2460793.274657079, 2460793.2748807753], [2460793.2752163196, 2460793.275440016], [2460793.275551864, 2460793.27577556], [2460793.2758874083, 2460793.2761111045], [2460793.2762229526, 2460793.276446649], [2460793.2771177376, 2460793.277341434], [2460793.27756513, 2460793.2779006744], [2460793.2787954593, 2460793.2790191555], [2460793.2793547, 2460793.279578396], [2460793.280696877, 2460793.280920573], [2460793.2821509023, 2460793.2823745986], [2460793.2824864467, 2460793.282710143], [2460793.282821991, 2460793.2833812316], [2460793.283604928, 2460793.283828624], [2460793.285394497, 2460793.2856181934], [2460793.286065586, 2460793.2865129784], [2460793.286960371, 2460793.2874077633], [2460793.2875196114, 2460793.288078852], [2460793.288302548, 2460793.2887499407], [2460793.289197333, 2460793.2894210294], [2460793.289868422, 2460793.2903158143], [2460793.2905395105, 2460793.290986903], [2460793.291434295, 2460793.2916579912], [2460793.2921053837, 2460793.29232908], [2460793.2927764724, 2460793.293223865], [2460793.293447561, 2460793.29411865], [2460793.294342346, 2460793.294454194], [2460793.2945660423, 2460793.2947897385], [2460793.295460827, 2460793.2956845234], [2460793.296355612, 2460793.296691156], [2460793.2970267003, 2460793.297474093], [2460793.297809637, 2460793.2983688777], [2460793.298592574, 2460793.2990399664], [2460793.2992636627, 2460793.299711055], [2460793.2999347514, 2460793.300382144], [2460793.3008295363, 2460793.3012769287], [2460793.301500625, 2460793.3019480174], [2460793.3021717137, 2460793.3026191057], [2460793.303066498, 2460793.3035138906], [2460793.303961283, 2460793.3046323718], [2460793.304856068, 2460793.3053034605], [2460793.3055271567, 2460793.305974549], [2460793.3060863973, 2460793.306869334], [2460793.3070930303, 2460793.307764119], [2460793.307987815, 2460793.3086589035], [2460793.3088825997, 2460793.309329992], [2460793.3096655365, 2460793.311119562], [2460793.3113432582, 2460793.3117906507], [2460793.312014347, 2460793.3124617394], [2460793.3126854356, 2460793.3133565243], [2460793.31358022, 2460793.3140276126], [2460793.3141394607, 2460793.3157053343], [2460793.316488271, 2460793.3189489297], [2460793.3191726254, 2460793.3193963217], [2460793.319843714, 2460793.3202911066], [2460793.320514803, 2460793.3211858915], [2460793.3214095877, 2460793.322416221], [2460793.3228636133, 2460793.3229754614], [2460793.3233110057, 2460793.324429487], [2460793.324876879, 2460793.3253242713], [2460793.326107208, 2460793.3263309044], [2460793.3265546006, 2460793.326890145], [2460793.327113841, 2460793.3272256893], [2460793.32778493, 2460793.328679715], [2460793.328903411, 2460793.3293508035], [2460793.3294626516, 2460793.329686348], [2460793.331028525, 2460793.331364069], [2460793.3319233097, 2460793.3324825503], [2460793.3325943984, 2460793.3328180946], [2460793.333153639, 2460793.3334891833], [2460793.3337128796, 2460793.334160272], [2460793.3344958164, 2460793.335055057], [2460793.3355024494, 2460793.33606169], [2460793.336285386, 2460793.3368446263], [2460793.337292019, 2460793.3379631075], [2460793.3380749556, 2460793.338298652], [2460793.338522348, 2460793.3388578924], [2460793.339305285, 2460793.3397526774], [2460793.3398645255, 2460793.3399763736], [2460793.340311918, 2460793.3407593104], [2460793.341765943, 2460793.34254888], [2460793.342772576, 2460793.3431081204], [2460793.343555513, 2460793.345568779], [2460793.345792475, 2460793.3462398676], [2460793.346463564, 2460793.34668726], [2460793.3469109563, 2460793.3471346525], [2460793.3474701964, 2460793.3476938927], [2460793.347805741, 2460793.348029437], [2460793.3484768295, 2460793.3487005257], [2460793.348812374, 2460793.349147918], [2460793.3494834625, 2460793.35082564], [2460793.350937488, 2460793.3511611843], [2460793.3513848805, 2460793.3516085767], [2460793.351832273, 2460793.3521678173], [2460793.3522796654, 2460793.352727058], [2460793.3529507536, 2460793.353398146], [2460793.3538455386, 2460793.354069235], [2460793.354292931, 2460793.355187716], [2460793.3556351084, 2460793.356082501], [2460793.356977286, 2460793.3574246783], [2460793.3575365264, 2460793.357983919], [2460793.358095767, 2460793.3592142477], [2460793.360556425, 2460793.3621222987], [2460793.362569691, 2460793.365925134], [2460793.3661488304, 2460793.3671554634], [2460793.3672673116, 2460793.368609489], [2460793.368833185, 2460793.3712938433], [2460793.3714056914, 2460793.371964932], [2460793.372188628, 2460793.3724123244], [2460793.3726360206, 2460793.3744255905], [2460793.3746492867, 2460793.3777810335], [2460793.3778928816, 2460793.3786758184], [2460793.3788995147, 2460793.3797942996], [2460793.380017996, 2460793.386728882], [2460793.38684073, 2460793.390643566], [2460793.390755414, 2460793.3912028065], [2460793.391538351, 2460793.3920975914], [2460793.3922094395, 2460793.392992376], [2460793.393216072, 2460793.3936634646], [2460793.393887161, 2460793.3945582495], [2460793.3947819457, 2460793.3953411863], [2460793.3956767307, 2460793.396795212], [2460793.397018908, 2460793.3981373888], [2460793.398361085, 2460793.3988084774], [2460793.3990321737, 2460793.3998151105], [2460793.400150655, 2460793.401269136], [2460793.401380984, 2460793.402163921], [2460793.402387617, 2460793.403729794], [2460793.4039534903, 2460793.404512731], [2460793.4048482752, 2460793.4059667564], [2460793.4061904526, 2460793.4079800225], [2460793.4082037187, 2460793.408427415], [2460793.408651111, 2460793.409993288], [2460793.4104406806, 2460793.410888073], [2460793.4111117693, 2460793.4115591617], [2460793.411782858, 2460793.4124539467], [2460793.412677643, 2460793.4142435165], [2460793.4144672127, 2460793.4149146047], [2460793.415026453, 2460793.4155856934], [2460793.4158093897, 2460793.416256782], [2460793.4164804784, 2460793.4167041746], [2460793.4173752633, 2460793.4178226558], [2460793.418046352, 2460793.4187174407], [2460793.418941137, 2460793.419164833], [2460793.4192766813, 2460793.4196122256], [2460793.419835922, 2460793.4203951624], [2460793.42050701, 2460793.4209544025], [2460793.421178099, 2460793.421513643], [2460793.4220728837, 2460793.42229658], [2460793.422520276, 2460793.4227439724], [2460793.423191365, 2460793.423526909], [2460793.4236387573, 2460793.424421694], [2460793.4245335422, 2460793.4249809347], [2460793.425204631, 2460793.4256520234], [2460793.4258757196, 2460793.4263231116], [2460793.426770504, 2460793.4269942003], [2460793.427553441, 2460793.4278889853], [2460793.4281126815, 2460793.428560074], [2460793.4290074664, 2460793.429678555], [2460793.430349644, 2460793.43057334], [2460793.4307970363, 2460793.431579973], [2460793.4316918207, 2460793.431915517], [2460793.4325866057, 2460793.432810302], [2460793.4335932387, 2460793.4341524793], [2460793.4343761755, 2460793.434823568], [2460793.4351591123, 2460793.435718353], [2460793.4375079223, 2460793.4377316185], [2460793.438179011, 2460793.438402707], [2460793.440304125, 2460793.4407515177], [2460793.440975214, 2460793.4415344545], [2460793.441869999, 2460793.442205543], [2460793.4424292394, 2460793.442876632], [2460793.4431003276, 2460793.443659568], [2460793.449699366, 2460793.449811214], [2460793.4501467585, 2460793.450594151], [2460793.452495569, 2460793.452719265], [2460793.4545088345, 2460793.4547325308], [2460793.45775243, 2460793.4581998223], [2460793.4583116705, 2460793.458870911], [2460793.4592064554, 2460793.459653848], [2460793.4598775436, 2460793.46010124], [2460793.4607723285, 2460793.460996025], [2460793.4614434172, 2460793.4616671135], [2460793.4618908097, 2460793.462338202], [2460793.4627855946, 2460793.463009291], [2460793.4635685314, 2460793.4644633164], [2460793.465246253, 2460793.465693645], [2460793.4659173414, 2460793.4661410376], [2460793.466476582, 2460793.4677069113], [2460793.4681543037, 2460793.468489848], [2460793.4687135443, 2460793.4690490887], [2460793.469496481, 2460793.4697201774], [2460793.470391266, 2460793.4706149623], [2460793.4709505066, 2460793.4715097467], [2460793.471957139, 2460793.4721808354], [2460793.47251638, 2460793.4729637722], [2460793.4734111647, 2460793.473746709], [2460793.473858557, 2460793.4740822534], [2460793.4744177978, 2460793.474753342], [2460793.4753125827, 2460793.475648127], [2460793.4759836714, 2460793.4763192157], [2460793.476990304, 2460793.4774376964], [2460793.4776613927, 2460793.4786680257], [2460793.4792272663, 2460793.4795628106], [2460793.479898355, 2460793.4805694437], [2460793.480681292, 2460793.4812405324], [2460793.481799773, 2460793.4823590135], [2460793.4825827093, 2460793.4830301018], [2460793.4835893423, 2460793.484036735], [2460793.4844841273, 2460793.485155216], [2460793.485267064, 2460793.486273697], [2460793.4867210896, 2460793.48728033], [2460793.4873921783, 2460793.488734355], [2460793.4888462033, 2460793.489405444], [2460793.489517292, 2460793.4901883807], [2460793.490300229, 2460793.4917542543], [2460793.4918661024, 2460793.492537191], [2460793.4926490393, 2460793.4930964317], [2460793.493320128, 2460793.4936556723], [2460793.493991216, 2460793.494662305], [2460793.494886001, 2460793.4952215455], [2460793.49555709, 2460793.4964518747], [2460793.496563723, 2460793.497794052], [2460793.4979059002, 2460793.498576989], [2460793.498688837, 2460793.4994717734], [2460793.4996954696, 2460793.5022679763], [2460793.5024916725, 2460793.562106713]]

freq_flags: [[46859741.2109375, 50765991.2109375], [50888061.5234375, 51254272.4609375], [51498413.0859375, 51742553.7109375], [51986694.3359375, 53451538.0859375], [53695678.7109375, 53939819.3359375], [54306030.2734375, 54794311.5234375], [54916381.8359375, 55160522.4609375], [56747436.5234375, 56869506.8359375], [57235717.7734375, 59432983.3984375], [60287475.5859375, 63339233.3984375], [63461303.7109375, 65170288.0859375], [66024780.2734375, 66879272.4609375], [67123413.0859375, 67367553.7109375], [67733764.6484375, 67977905.2734375], [68954467.7734375, 69198608.3984375], [69931030.2734375, 70053100.5859375], [73593139.6484375, 73715209.9609375], [73837280.2734375, 75302124.0234375], [75424194.3359375, 75546264.6484375], [77011108.3984375, 77133178.7109375], [77621459.9609375, 79208374.0234375], [79330444.3359375, 80307006.8359375], [80429077.1484375, 80795288.0859375], [81039428.7109375, 81283569.3359375], [82260131.8359375, 82382202.1484375], [83847045.8984375, 84091186.5234375], [84335327.1484375, 84579467.7734375], [85189819.3359375, 85556030.2734375], [85800170.8984375, 86044311.5234375], [86166381.8359375, 86776733.3984375], [86898803.7109375, 108505249.0234375], [108627319.3359375, 109237670.8984375], [109970092.7734375, 110092163.0859375], [111923217.7734375, 113876342.7734375], [116073608.3984375, 116195678.7109375], [116439819.3359375, 116561889.6484375], [116683959.9609375, 116806030.2734375], [120223999.0234375, 120346069.3359375], [123153686.5234375, 126571655.2734375], [127426147.4609375, 132064819.3359375], [136337280.2734375, 136459350.5859375], [136947631.8359375, 138168334.9609375], [138656616.2109375, 138778686.5234375], [139877319.3359375, 139999389.6484375], [141464233.3984375, 141830444.3359375], [142074584.9609375, 142318725.5859375], [142440795.8984375, 144149780.2734375], [145492553.7109375, 146102905.2734375], [146224975.5859375, 146347045.8984375], [147323608.3984375, 147567749.0234375], [148178100.5859375, 148422241.2109375], [149032592.7734375, 149398803.7109375], [149642944.3359375, 150131225.5859375], [152938842.7734375, 153182983.3984375], [153427124.0234375, 153549194.3359375], [154159545.8984375, 154403686.5234375], [155014038.0859375, 155258178.7109375], [155990600.5859375, 156112670.8984375], [156845092.7734375, 156967163.0859375], [157577514.6484375, 157699584.9609375], [157943725.5859375, 158065795.8984375], [158187866.2109375, 158309936.5234375], [159164428.7109375, 159286499.0234375], [160140991.2109375, 160385131.8359375], [161361694.3359375, 161483764.6484375], [163070678.7109375, 166122436.5234375], [166244506.8359375, 168197631.8359375], [169906616.2109375, 170150756.8359375], [170272827.1484375, 170394897.4609375], [170516967.7734375, 170639038.0859375], [170883178.7109375, 171005249.0234375], [171249389.6484375, 171371459.9609375], [171737670.8984375, 171981811.5234375], [174911499.0234375, 175521850.5859375], [178451538.0859375, 178573608.3984375], [179183959.9609375, 182479858.3984375], [183090209.9609375, 183334350.5859375], [187362670.8984375, 187728881.8359375], [189193725.5859375, 189437866.2109375], [189926147.4609375, 190048217.7734375], [191024780.2734375, 191513061.5234375], [191635131.8359375, 191757202.1484375], [193222045.8984375, 193344116.2109375], [194442749.0234375, 194564819.3359375], [195663452.1484375, 195785522.4609375], [197128295.8984375, 197372436.5234375], [198104858.3984375, 198348999.0234375], [198959350.5859375, 199569702.1484375], [199935913.0859375, 200057983.3984375], [200668334.9609375, 202133178.7109375], [202377319.3359375, 206039428.7109375], [207138061.5234375, 207382202.1484375], [208480834.9609375, 208724975.5859375], [209945678.7109375, 210067749.0234375], [212020874.0234375, 212265014.6484375], [213119506.8359375, 213363647.4609375], [215072631.8359375, 215316772.4609375], [220565795.8984375, 221542358.3984375], [222763061.5234375, 223861694.3359375], [225692749.0234375, 225814819.3359375], [227279663.0859375, 227767944.3359375], [229110717.7734375, 229476928.7109375], [229843139.6484375, 230087280.2734375], [230941772.4609375, 231307983.3984375], [233993530.2734375, 234359741.2109375]]

ex_ants: [[4, Jee], [7, Jee], [7, Jnn], [8, Jee], [8, Jnn], [10, Jee], [15, Jee], [15, Jnn], [18, Jee], [18, Jnn], [20, Jee], [20, Jnn], [21, Jee], [27, Jee], [27, Jnn], [28, Jee], [28, Jnn], [30, Jee], [30, Jnn], [32, Jnn], [33, Jee], [33, Jnn], [34, Jee], [35, Jnn], [37, Jee], [37, Jnn], [40, Jnn], [42, Jee], [42, Jnn], [45, Jee], [46, Jee], [47, Jnn], [51, Jee], [56, Jee], [56, Jnn], [60, Jnn], [62, Jee], [64, Jnn], [66, Jee], [66, Jnn], [67, Jnn], [68, Jee], [68, Jnn], [70, Jee], [70, Jnn], [71, Jee], [71, Jnn], [72, Jee], [72, Jnn], [75, Jee], [76, Jee], [76, Jnn], [77, Jnn], [78, Jee], [81, Jnn], [82, Jnn], [86, Jee], [86, Jnn], [87, Jee], [92, Jee], [97, Jnn], [98, Jnn], [99, Jnn], [102, Jnn], [104, Jnn], [105, Jee], [107, Jee], [107, Jnn], [108, Jnn], [109, Jnn], [113, Jnn], [115, Jee], [117, Jee], [120, Jee], [120, Jnn], [121, Jee], [125, Jee], [125, Jnn], [130, Jee], [130, Jnn], [134, Jee], [135, Jee], [136, Jnn], [137, Jee], [143, Jnn], [148, Jee], [153, Jnn], [155, Jnn], [161, Jnn], [166, Jee], [166, Jnn], [167, Jnn], [170, Jee], [172, Jnn], [173, Jnn], [174, Jnn], [175, Jnn], [176, Jnn], [180, Jee], [180, Jnn], [182, Jee], [184, Jee], [184, Jnn], [185, Jee], [185, Jnn], [186, Jee], [186, Jnn], [188, Jnn], [194, Jnn], [197, Jnn], [199, Jnn], [200, Jee], [200, Jnn], [202, Jnn], [204, Jnn], [206, Jnn], [208, Jnn], [209, Jnn], [212, Jnn], [213, Jee], [214, Jee], [214, Jnn], [218, Jnn], [227, Jee], [227, Jnn], [231, Jee], [231, Jnn], [236, Jee], [236, Jnn], [238, Jnn], [239, Jee], [240, Jee], [240, Jnn], [244, Jee], [250, Jee], [251, Jee], [251, Jnn], [252, Jnn], [253, Jnn], [254, Jee], [254, Jnn], [255, Jee], [255, Jnn], [257, Jee], [257, Jnn], [262, Jee], [262, Jnn], [266, Jee], [266, Jnn], [267, Jee], [267, Jnn], [268, Jee], [268, Jnn], [269, Jee], [269, Jnn], [271, Jee], [271, Jnn], [273, Jee], [273, Jnn], [281, Jnn], [282, Jee], [282, Jnn], [283, Jee], [283, Jnn], [284, Jee], [284, Jnn], [286, Jee], [286, Jnn], [320, Jee], [320, Jnn], [321, Jee], [321, Jnn], [322, Jee], [322, Jnn], [323, Jee], [323, Jnn], [324, Jee], [324, Jnn], [325, Jee], [325, Jnn], [326, Jee], [326, Jnn], [327, Jee], [327, Jnn], [328, Jee], [328, Jnn], [329, Jee], [329, Jnn], [331, Jee], [331, Jnn], [332, Jee], [332, Jnn], [333, Jee], [333, Jnn], [336, Jee], [336, Jnn], [340, Jee], [340, Jnn]]

Metadata¶

In [24]:
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.7.1.dev45+g4a0c6f1
hera_qm: 2.2.1.dev2+ga535e9e
hera_filters: 0.1.6.dev9+gf165ec1
hera_notebook_templates: 0.1.dev989+gee0995d
pyuvdata: 3.1.3
In [25]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 30.99 minutes.