Full Day Antenna Flagging¶

by Josh Dillon, last updated February 28, 2023

This notebook is designed to harmonize the potentially inconsistent per-antenna flagging coming out of file_calibration notebook. It seeks to flag likely bad times between known bad times and to impose maximum flagging factions and maximum stretches of consecutive flags between otherwise good data (which can raise problems when smoothing or filtering later).

Here's a set of links to skip to particular figures and tables:

• Figure 1: Flag Summary vs. JD¶

• Figure 2: Array Flag Fraction Summary¶

• Figure 3: Flag Fraction vs. JD Summary¶

• Figure 4: Per-Antenna Flag Harmonization Summary¶

In [1]:
import time
tstart = time.time()
In [2]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import pandas as pd
import glob
import copy
import matplotlib
import matplotlib.pyplot as plt
from pyuvdata import UVFlag, UVData, UVCal
from hera_cal import io, utils
from hera_qm import ant_class
from uvtools.plot import plot_antpos, plot_antclass
from scipy.ndimage import convolve
%matplotlib inline
from IPython.display import display, HTML

Parse inputs¶

In [3]:
# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/users/jsdillon/lustre/H6C/abscal/2459853/zen.2459853.25518.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
ANT_CLASS_SUFFIX = os.environ.get("ANT_CLASS_SUFFIX", 'ant_class.csv')
OUT_FLAG_SUFFIX = os.environ.get("OUT_FLAG_SUFFIX", 'sum.antenna_flags.h5')

# Parameters for harmonizing partially-flagged antennas
SMOOTHING_SCALE_NFILES = float(os.environ.get("SMOOTHING_SCALE_NFILES", 30))
MAX_FLAG_GAP_NFILES = int(os.environ.get("MAX_FLAG_GAP_NFILES", 30))

# Max flag fractions (before just flagging the whole antenna)
POWER_MAX_FLAG_FRAC = float(os.environ.get("POWER_MAX_FLAG_FRAC", .5))
AUTO_POWER_MAX_FLAG_FRAC = float(os.environ.get("AUTO_POWER_MAX_FLAG_FRAC", .5))
AUTO_SHAPE_MAX_FLAG_FRAC = float(os.environ.get("AUTO_SHAPE_MAX_FLAG_FRAC", .25))
AUTO_SLOPE_MAX_FLAG_FRAC = float(os.environ.get("AUTO_SLOPE_MAX_FLAG_FRAC", .25))
AUTO_RFI_MAX_FLAG_FRAC = float(os.environ.get("AUTO_RFI_MAX_FLAG_FRAC", .25))
CHISQ_MAX_FLAG_FRAC = float(os.environ.get("AUTO_RFI_MAX_FLAG_FRAC", .5))
OVERALL_MAX_FLAG_FRAC = float(os.environ.get("OVERALL_MAX_FLAG_FRAC", .5))

for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'ANT_CLASS_SUFFIX', 'OUT_FLAG_SUFFIX', 'SMOOTHING_SCALE_NFILES', 
                'MAX_FLAG_GAP_NFILES', 'POWER_MAX_FLAG_FRAC', 'AUTO_POWER_MAX_FLAG_FRAC', 'AUTO_SHAPE_MAX_FLAG_FRAC',
                'AUTO_SLOPE_MAX_FLAG_FRAC', 'AUTO_RFI_MAX_FLAG_FRAC', 'CHISQ_MAX_FLAG_FRAC', 'OVERALL_MAX_FLAG_FRAC']:
        print(f'{setting} = {eval(setting)}')
SUM_FILE = /mnt/sn1/2460119/zen.2460119.42130.sum.uvh5
SUM_SUFFIX = sum.uvh5
CAL_SUFFIX = sum.omni.calfits
ANT_CLASS_SUFFIX = sum.ant_class.csv
OUT_FLAG_SUFFIX = sum.antenna_flags.h5
SMOOTHING_SCALE_NFILES = 30.0
MAX_FLAG_GAP_NFILES = 30
POWER_MAX_FLAG_FRAC = 30.0
AUTO_POWER_MAX_FLAG_FRAC = 0.5
AUTO_SHAPE_MAX_FLAG_FRAC = 0.25
AUTO_SLOPE_MAX_FLAG_FRAC = 0.25
AUTO_RFI_MAX_FLAG_FRAC = 0.25
CHISQ_MAX_FLAG_FRAC = 0.25
OVERALL_MAX_FLAG_FRAC = 0.5
In [4]:
# ant_metrics bounds for low correlation / dead antennas
am_corr_bad = (0, float(os.environ.get("AM_CORR_BAD", 0.3)))
am_corr_suspect = (float(os.environ.get("AM_CORR_BAD", 0.3)), float(os.environ.get("AM_CORR_SUSPECT", 0.5)))

# ant_metrics bounds for cross-polarized antennas
am_xpol_bad = (-1, float(os.environ.get("AM_XPOL_BAD", -0.1)))
am_xpol_suspect = (float(os.environ.get("AM_XPOL_BAD", -0.1)), float(os.environ.get("AM_XPOL_SUSPECT", 0)))

# bounds on solar altitude (in degrees)
good_solar_altitude = (-90, float(os.environ.get("SUSPECT_SOLAR_ALTITUDE", 0)))
suspect_solar_altitude = (float(os.environ.get("SUSPECT_SOLAR_ALTITUDE", 0)), 90)

# bounds on zeros in spectra
good_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_GOOD", 2)))
suspect_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_SUSPECT", 8)))

# bounds on autocorrelation power
auto_power_good = (float(os.environ.get("AUTO_POWER_GOOD_LOW", 5)), float(os.environ.get("AUTO_POWER_GOOD_HIGH", 30)))
auto_power_suspect = (float(os.environ.get("AUTO_POWER_SUSPECT_LOW", 1)), float(os.environ.get("AUTO_POWER_SUSPECT_HIGH", 60)))

# bounds on autocorrelation slope
auto_slope_good = (float(os.environ.get("AUTO_SLOPE_GOOD_LOW", -0.4)), float(os.environ.get("AUTO_SLOPE_GOOD_HIGH", 0.4)))
auto_slope_suspect = (float(os.environ.get("AUTO_SLOPE_SUSPECT_LOW", -0.6)), float(os.environ.get("AUTO_SLOPE_SUSPECT_HIGH", 0.6)))

# bounds on autocorrelation RFI
auto_rfi_good = (0, float(os.environ.get("AUTO_RFI_GOOD", 0.015)))
auto_rfi_suspect = (0, float(os.environ.get("AUTO_RFI_SUSPECT", 0.03)))

# bounds on autocorrelation shape
auto_shape_good = (0, float(os.environ.get("AUTO_SHAPE_GOOD", 0.1)))
auto_shape_suspect = (0, float(os.environ.get("AUTO_SHAPE_SUSPECT", 0.2)))

# bounds on chi^2 per antenna in omnical
oc_cspa_good = (0, float(os.environ.get("OC_CSPA_GOOD", 2)))
oc_cspa_suspect = (0, float(os.environ.get("OC_CSPA_SUSPECT", 3)))

OC_SKIP_OUTRIGGERS = os.environ.get("OC_SKIP_OUTRIGGERS", "TRUE").upper() == "TRUE"

for bound in ['am_corr_bad', 'am_corr_suspect', 'am_xpol_bad', 'am_xpol_suspect', 
              'good_solar_altitude', 'suspect_solar_altitude',
              'good_zeros_per_eo_spectrum', 'suspect_zeros_per_eo_spectrum',
              'auto_power_good', 'auto_power_suspect', 'auto_slope_good', 'auto_slope_suspect',
              'auto_rfi_good', 'auto_rfi_suspect', 'auto_shape_good', 'auto_shape_suspect',
              'oc_cspa_good', 'oc_cspa_suspect', 'OC_SKIP_OUTRIGGERS']:
    print(f'{bound} = {eval(bound)}')
am_corr_bad = (0, 0.2)
am_corr_suspect = (0.2, 0.4)
am_xpol_bad = (-1, -0.1)
am_xpol_suspect = (-0.1, 0.0)
good_solar_altitude = (-90, 0.0)
suspect_solar_altitude = (0.0, 90)
good_zeros_per_eo_spectrum = (0, 2)
suspect_zeros_per_eo_spectrum = (0, 8)
auto_power_good = (5.0, 30.0)
auto_power_suspect = (1.0, 60.0)
auto_slope_good = (-0.4, 0.4)
auto_slope_suspect = (-0.6, 0.6)
auto_rfi_good = (0, 0.015)
auto_rfi_suspect = (0, 0.03)
auto_shape_good = (0, 0.1)
auto_shape_suspect = (0, 0.2)
oc_cspa_good = (0, 2.0)
oc_cspa_suspect = (0, 3.0)
OC_SKIP_OUTRIGGERS = True

Load data¶

In [5]:
hd = io.HERAData(SUM_FILE)
In [6]:
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 359 *.sum.omni.calfits files starting with /mnt/sn1/2460119/zen.2460119.42130.sum.omni.calfits.
In [7]:
ant_class_csvs_glob = sum_glob.replace(SUM_SUFFIX, ANT_CLASS_SUFFIX)
ant_class_csvs = sorted(glob.glob(ant_class_csvs_glob))
jds = [float(f.split('/')[-1].split('zen.')[-1].split('.sum')[0]) for f in ant_class_csvs]
frac_jds = jds - np.floor(jds[0])
Ncsvs = len(ant_class_csvs)
print(f'Found {Ncsvs} *.{ANT_CLASS_SUFFIX} files starting with {ant_class_csvs[0]}.')
Found 359 *.sum.ant_class.csv files starting with /mnt/sn1/2460119/zen.2460119.42130.sum.ant_class.csv.
In [8]:
# Load ant_class csvs
tables = [pd.read_csv(f).dropna(axis=0, how='all') for f in ant_class_csvs]
table_cols = tables[0].columns[1::2]
class_cols = tables[0].columns[2::2]
ap_strs = np.array(tables[0]['Antenna'])
ants = sorted(set(int(a[:-1]) for a in ap_strs))
In [9]:
# build up dictionaries for full night metrics and classifications
replace = {'-': np.nan, 'INF': np.inf, 'No': False, 'Yes': True}

metric_data = {tc: {} for tc in table_cols}
class_data = {tc: {} for tc in table_cols}
for tc, cc in zip(table_cols, class_cols):
    class_array = np.vstack([t[cc] for t in tables]).T
    metric_array = np.vstack([t[tc] for t in tables]).T
    for ca, ma, ap_str in zip(class_array, metric_array, ap_strs):
        class_data[tc][ap_str] = ca
        if tc == 'Antenna':
            metric_data[tc][ap_str] = ma
        else:
            metric_data[tc][ap_str] = np.array([replace[val] if val in replace else float(val) for val in ma])
In [10]:
original_flags = {ap_str: np.array(class_data['Antenna'][ap_str] == 'bad') for ap_str in ap_strs}
print(f'{np.mean(list(original_flags.values())):.2%} of antenna-files flagged by RTP.')
47.18% of antenna-files flagged by RTP.

Flagging helper functions¶

In [11]:
def true_stretches(bool_arr):
    '''Returns a list of slices corresponding to contiguous sequences where bool_arr is True.'''
    # find the indices where bool_arr changes from True to False or vice versa
    diff = np.diff(bool_arr.astype(int))
    starts = np.where(diff == 1)[0] + 1
    ends = np.where(diff == -1)[0]
    
    # handle first and last values
    if bool_arr[0]:
        starts = np.insert(starts, 0, 0)
    if bool_arr[-1]:
        ends = np.append(ends, len(bool_arr) - 1)

    stretches = [slice(starts[i], ends[i] + 1) for i in range(len(starts))]
    return stretches
In [12]:
def impose_max_flag_gap(flags, max_flag_gap=30):
    '''Adds flags to limit the largest possible number of flagged files between unflagged files.
    
    Arguments:
        flags_in: 1D boolean numpy array of starting flags (modified in place)
        max_flag_gap: integer maximum allowed sequential flags (default 30)
    '''
    bad_stretches = sorted(true_stretches(flags), key=lambda s: s.stop - s.start)[::-1]
    for bs in bad_stretches:
        if bs.stop - bs.start > max_flag_gap:
            # figure out whether to flag everything before or after this gap
            if np.sum(~flags[bs.stop:]) > np.sum(~flags[:bs.start]):
                flags[:bs.start] = True
            else:
                flags[bs.stop:] = True
                
    return flags
In [13]:
def metric_convolution_flagging(metric, starting_flags, ok_range, sigma=30, max_flag_gap=30):
    '''Grows flags by looking at whether the given metric returns to some OK range in the 
    gap between flags. Also imposes a max_flag_gap (see impose_max_flag_gap()).
    
    Arguments:
        metric: 1D numpy array of floats containing the per-file metric values used for flagging.
        starting_flags: 1D numpy array of booleans of initial flags
        ok_range: length-2 tuple of metric range outside of which flags are flags are considered bad
        sigma: standard deviation of metric Gaussian smoothing scale (in units of files)
        max_flag_gap: integer maximum allowed sequential flags (default 30)
    Returns:
        new_flags: starting flags, but with possible additional flags
    '''
    
    # compute convolved metric
    kernel = np.exp(-np.arange(-len(metric) // 2, len(metric) // 2 + 1)**2 / 2 / sigma**2)
    kernel /= np.sum(kernel)
    convolved_metric = convolve(metric, kernel, mode='reflect')

    new_flags = np.array(starting_flags)
    nflags = np.sum(new_flags)
    while True:
        # figure out which bad stretches are "persistant" and which are just blips
        bad_stretches = true_stretches(new_flags)
        persistant_bad_stretches = [bs for bs in bad_stretches if 
                                    np.any((convolved_metric[bs] > ok_range[1]) | (convolved_metric[bs] < ok_range[0]))]

        # figure out which stretches that aren't "persistant" bad eventually go back to OK... don't flag these 
        not_persistant_bad = np.ones_like(new_flags)
        for bs in bad_stretches:
            if np.any(convolved_metric[bs] > ok_range[1]):
                not_persistant_bad[bs] = False
        for stretch in true_stretches(not_persistant_bad):
            if not np.any((convolved_metric[stretch] > ok_range[0]) & (convolved_metric[stretch] < ok_range[1])):
                new_flags[stretch] = True            

        impose_max_flag_gap(new_flags, max_flag_gap=max_flag_gap)
            
        # check if flags haven't grown and if so, break
        if np.sum(new_flags) == nflags:
            return new_flags
        else:
            nflags = np.sum(new_flags)   
In [14]:
class FlagHistory:
    '''Helps keep strack of why flags get applied'''
    
    def __init__(self, ap_strs, Nfiles):
        self.final_flags = {ap_str: np.zeros(Nfiles, dtype=bool) for ap_str in ap_strs}
        self.history = {}
    
    def update(self, ap_str, rtp_flags, updated_flags, rationale):
        self.history[(ap_str, rationale)] = (np.array(self.final_flags[ap_str]), np.array(rtp_flags), np.array(updated_flags))
        self.final_flags[ap_str] |= updated_flags
        
    def summarize(self, description):
        print(f'{np.mean(list(self.final_flags.values())):.2%} of antenna-files flagged after {description}.')

Perform flagging¶

In [15]:
fh = FlagHistory(ap_strs, Ncsvs)

# STEP 1: FLAG SUN UP DATA
for ap_str in ap_strs:
    sun_up = metric_data['Solar Alt'][ap_str] > good_solar_altitude[1]
    fh.update(ap_str, sun_up, sun_up, 'Solar Alt')
solar_flags = np.all([metric_data['Solar Alt'][ap_str] > good_solar_altitude[1] for ap_str in ap_strs], axis=0)
fh.summarize('flagging sun-up data')

# STEP 2: FLAG TOTALLY DEAD ANTENNAS
for ap_str in ap_strs:
    fh.update(ap_str, metric_data['Dead?'][ap_str], metric_data['Dead?'][ap_str], 'Dead?')
fh.summarize('removing dead antennas')    

# STEP 3: FLAG OUTRIGGERS
if OC_SKIP_OUTRIGGERS:
    for ap_str in ap_strs:
        if int(ap_str[:-1]) >= 320:
            fh.update(ap_str, np.ones(Ncsvs, dtype=bool), np.ones(Ncsvs, dtype=bool), 'Outrigger')
    fh.summarize('flagging outriggers')

# STEP 4: FLAG CROSS-POLARIZED ANTENNAS
for ap_str in ap_strs:
    if np.mean(metric_data['Cross-Polarized'][ap_str]) < am_xpol_bad[1]:
        fh.update(ap_str, class_data['Cross-Polarized'][ap_str] == 'bad', np.ones(Ncsvs, dtype=bool), 'Cross-Polarized')
fh.summarize('removing cross-polarized antennas')
    
# STEP 5: FLAG POORLY-CORRELATING ANTENNAS
for ap_str in ap_strs:
    if np.mean(metric_data['Low Correlation'][ap_str]) < am_corr_bad[1]:
        fh.update(ap_str, class_data['Low Correlation'][ap_str] == 'bad', np.ones(Ncsvs, dtype=bool), 'Cross-Polarized')
fh.summarize('removing non-correlating antennas')

# STEP 6: FLAG ON AUTOCORRELATIONS
for category, ok_range, max_flag_frac in zip(['Autocorr Power', 'Autocorr Shape', 'Autocorr Slope', 'RFI in Autos'],
                                              [auto_power_suspect, auto_shape_good, auto_slope_good, auto_rfi_good],
                                              [AUTO_POWER_MAX_FLAG_FRAC, AUTO_SHAPE_MAX_FLAG_FRAC, AUTO_SLOPE_MAX_FLAG_FRAC, AUTO_RFI_MAX_FLAG_FRAC]):
    for ap_str in ap_strs:
        # apply RTP flags for these categories
        rtp_flags = (class_data[category][ap_str] == 'bad')
        # if not completely flagged or completely unflagged, grow flags via convolution
        if np.any(rtp_flags) and not np.all(fh.final_flags[ap_str] | rtp_flags):
            metric = metric_data[category][ap_str]    
            new_flags = metric_convolution_flagging(metric, rtp_flags, ok_range, sigma=SMOOTHING_SCALE_NFILES, max_flag_gap=MAX_FLAG_GAP_NFILES)
            # if too many times are flagged for this category, flag the whole antenna (excluding sun-up times)
            if np.mean(new_flags[~solar_flags]) > max_flag_frac:
                new_flags[:] = True
        else:
            new_flags = rtp_flags
        fh.update(ap_str, rtp_flags, new_flags, category)
    fh.summarize(f'flagging antennas for {category}')
    
# STEP 7: FLAG FOR HIGH CHI^2
for ap_str in ap_strs:
    flagged_for_cspa = (~fh.final_flags[ap_str]) & (class_data['Even/Odd Zeros'][ap_str] != 'bad') & (class_data['Redcal chi^2'][ap_str] == 'bad')
    if np.any(flagged_for_cspa) and not np.all(fh.final_flags[ap_str] | flagged_for_cspa):
        cspa = metric_data['Redcal chi^2'][ap_str]
        new_flags = metric_convolution_flagging(cspa, flagged_for_cspa, oc_cspa_suspect, sigma=SMOOTHING_SCALE_NFILES, max_flag_gap=MAX_FLAG_GAP_NFILES)
        if np.mean(new_flags[~solar_flags]) > CHISQ_MAX_FLAG_FRAC:
            new_flags[:] = True
    else:
        new_flags = flagged_for_cspa
    fh.update(ap_str, flagged_for_cspa, new_flags, 'Redcal chi^2')
fh.summarize('flagging antennas for high redcal chi^2')

# STEP 8: FLAG FOR TOO EVEN/ODD ZEROS (USUALLY PACKET LOSS)
for ap_str in ap_strs:
    rtp_flags = (class_data['Even/Odd Zeros'][ap_str] == 'bad')
    fh.update(ap_str, rtp_flags, rtp_flags, 'Even/Odd Zeros')
fh.summarize('flagging antennas for excess even/odd zeros')

# STEP 9: FLAG ANTENNAS THAT ARE ALREADY LARGELY FLAGGED
for ap_str in ap_strs:
    new_flags = np.array(fh.final_flags[ap_str])
    impose_max_flag_gap(new_flags)
    if np.mean(new_flags[~solar_flags]) > OVERALL_MAX_FLAG_FRAC:
        new_flags[:] = True
    fh.update(ap_str, fh.final_flags[ap_str], new_flags, 'Frequently Flagged')
fh.summarize('flagging frequently-flagged antennas')
0.00% of antenna-files flagged after flagging sun-up data.
27.76% of antenna-files flagged after removing dead antennas.
30.21% of antenna-files flagged after flagging outriggers.
30.21% of antenna-files flagged after removing cross-polarized antennas.
35.58% of antenna-files flagged after removing non-correlating antennas.
42.67% of antenna-files flagged after flagging antennas for Autocorr Power.
46.52% of antenna-files flagged after flagging antennas for Autocorr Shape.
46.63% of antenna-files flagged after flagging antennas for Autocorr Slope.
47.72% of antenna-files flagged after flagging antennas for RFI in Autos.
49.86% of antenna-files flagged after flagging antennas for high redcal chi^2.
49.89% of antenna-files flagged after flagging antennas for excess even/odd zeros.
49.89% of antenna-files flagged after flagging frequently-flagged antennas.

Plotting¶

In [16]:
def flagging_board():
    cmap = matplotlib.colors.ListedColormap(['blue', 'black', 'red', 'orange'])
    to_plot = np.vstack([np.where(~fh.final_flags[ap_str] & ~original_flags[ap_str], 0,
                                  np.where(~fh.final_flags[ap_str] & original_flags[ap_str], -1,
                                           np.where(fh.final_flags[ap_str] & ~original_flags[ap_str], 2, 1))) for ap_str in ap_strs])

    plt.figure(figsize=(14, len(ants) / 10), dpi=100)
    plt.imshow(to_plot, aspect='auto', interpolation='none', cmap=cmap, vmin=-1.5, vmax=2.5,
               extent=[frac_jds[0], frac_jds[-1], len(ants), 0])
    plt.xlabel(f'JD - {int(jds[0])}')
    plt.yticks(ticks=np.arange(.5, len(ants)+.5), labels=[ant for ant in ants], fontsize=6)
    plt.ylabel('Antenna Number (East First, Then North)')
    plt.gca().tick_params(right=True, top=True, labelright=True, labeltop=True)
    cbar = plt.colorbar(location='top', aspect=40)
    cbar.set_ticks([-1, 0, 1, 2])
    cbar.set_ticklabels(['RTP Flag Removed', 'No Flags', 'Flagged by RTP and Here', 'Flagged Here Only'])
    plt.tight_layout()

Figure 1: Flag Summary vs. JD¶

This figure summarizes the flagging harmonization performed in this notebook, showing which flags were added (or potentially removed).

In [17]:
flagging_board()
In [18]:
def flag_frac_array_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), dpi=100, gridspec_kw={'width_ratios': [2, 1]})

    def flag_frac_panel(ax, antnums, radius=7, legend=False):

        ang_dict = {'e': (225, 405), 'n': (45, 225)}

        xpos = np.array([hd.antpos[antnum][0] for antnum in ants if antnum in antnums])
        ypos = np.array([hd.antpos[antnum][1] for antnum in ants if antnum in antnums])
        scatter = ax.scatter(xpos, ypos, c='w', s=0)
        for ap_str in ap_strs:
            antnum, pol = int(ap_str[:-1]), ap_str[-1]
            if antnum in antnums:
                ax.add_artist(matplotlib.patches.Wedge(tuple(hd.antpos[antnum][0:2]), radius, *ang_dict[pol], color='grey'))
                flag_frac = np.mean(fh.final_flags[ap_str][~solar_flags])
                if flag_frac > .05:
                    ax.add_artist(matplotlib.patches.Wedge(tuple(hd.antpos[antnum][0:2]), radius * np.sqrt(flag_frac), *ang_dict[pol], color='r'))
                ax.text(hd.antpos[antnum][0], hd.antpos[antnum][1], str(antnum), color='w',  va='center', ha='center', zorder=100)

        ax.axis('equal')
        ax.set_xlim([np.min(xpos) - radius * 2, np.max(xpos) + radius * 2])
        ax.set_ylim([np.min(ypos) - radius * 2, np.max(ypos) + radius * 2])
        ax.set_xlabel("East-West Position (meters)", size=12)
        ax.set_ylabel("North-South Position (meters)", size=12)

        if legend:
            legend_objs = []
            legend_labels = []

            legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='grey', markerfacecolor='grey', markersize=15))
            unflagged_nights = lambda pol: np.sum([np.mean(~fh.final_flags[ap_str][~solar_flags]) for ap_str in ap_strs if ap_str[-1] == pol])
            legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} unflagged {pol}-polarized\nantenna-nights.' for pol in ['e', 'n']]))

            legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='red', markerfacecolor='red', markersize=15))
            unflagged_nights = lambda pol: np.sum([np.mean(fh.final_flags[ap_str][~solar_flags]) for ap_str in ap_strs if ap_str[-1] == pol])
            legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} flagged {pol}-polarized\nantenna-nights.' for pol in ['e', 'n']]))        
            ax.legend(legend_objs, legend_labels, ncol=1, fontsize=12)

    flag_frac_panel(axes[0], [ant for ant in ants if ant < 320], radius=7)
    flag_frac_panel(axes[1], [ant for ant in ants if ant >= 320], radius=50, legend=True)
    plt.tight_layout()

Figure 2: Array Flag Fraction Summary¶

Flagging fraction of nighttime data for each antpol. Top-left semicircles are North-South polarized antpols; bottom right semicircles are East-West polarized antpols. Flag fraction is proportional to red area of each semicircle. Left panel is core antennas, right panel is outriggers.

In [19]:
flag_frac_array_plot()
In [20]:
def flag_summary_vs_jd():
    plt.figure(figsize=(14, 5), dpi=100,)
    plt.plot(frac_jds, np.mean(np.vstack([fh.final_flags[ap_str] for ap_str in ap_strs if ap_str[-1] == 'e']), axis=0), '.', ms=2, label='EW-Polarized')
    plt.plot(frac_jds, np.mean(np.vstack([fh.final_flags[ap_str] for ap_str in ap_strs if ap_str[-1] == 'n']), axis=0), '.', ms=2, label='NS-Polarized')
    plt.plot(frac_jds, np.mean(np.vstack([fh.final_flags[ap_str] for ap_str in ap_strs]), axis=0), 'k.', ms=3, label='All Antpols')
    plt.legend()
    plt.xlabel(f'JD - {int(np.floor(jds[0]))}')
    plt.ylabel('Fraction of Antennas Flagged')
    plt.tight_layout()

Figure 3: Flag Fraction vs. JD Summary¶

This plot shows the fraction of the array that's flagged for any reason as a function of time, both overall and per-polarization.

In [21]:
flag_summary_vs_jd()
In [22]:
def class_to_int(c):
    return np.where(c == 'bad', 1.7, np.where(c=='suspect', 1, 0))

def per_antenna_flag_harmonization_plots():
    # compute convolution kernel
    kernel = np.exp(-np.arange(-Ncsvs // 2, Ncsvs // 2 + 1)**2 / 2 / SMOOTHING_SCALE_NFILES**2)
    kernel /= np.sum(kernel)
    
    # JD computations
    djd = np.median(np.diff(jds))

    # loop over ant-pols
    for ap_str in ap_strs:
        # if there are new nighttime flags
        if np.sum(fh.final_flags[ap_str][~solar_flags]) > np.sum(original_flags[ap_str][~solar_flags]):
            # if the new flags aren't just because of the OVERALL_MAX_FLAG_FRAC
            if np.mean(original_flags[ap_str][~solar_flags]) <= OVERALL_MAX_FLAG_FRAC:
                for aps, category in fh.history.keys():
                    if ap_str == aps:
                        previous_flags, rtp_flags, new_flags = fh.history[(aps, category)]
                        # if new flags were added for this reason
                        if np.sum(new_flags) > np.sum(rtp_flags):
                            plt.figure(figsize=(14, 3), dpi=100)
                            
                            # plot 
                            plt.scatter(frac_jds, metric_data[category][ap_str], c=class_to_int(class_data[category][ap_str]), s=3,
                                        vmin=0, vmax=1.7, cmap='RdYlGn_r', label='Metric/Classification')
                            plt.plot(frac_jds, convolve(metric_data[category][ap_str], kernel, mode='reflect'), 'k--', label='Smoothed Metric')
                            plt.ylabel(category)
                            plt.xlabel(f'JD - {int(np.floor(jds[0]))}')
                            plt.xlim([np.min(frac_jds) - 10 * djd, 1.2 * np.max(frac_jds) - .2 * np.min(frac_jds)])
                            
                            # Indicate flagged stretches 
                            for i, bad_stretch in enumerate(true_stretches(rtp_flags)):
                                plt.axvspan(frac_jds[bad_stretch.start] - djd / 2, frac_jds[bad_stretch.stop - 1] + djd / 2, zorder=0, color='red', alpha=.75, lw=0,
                                            label=(f'RTP Flags:\n{np.mean(rtp_flags[~solar_flags]):.2%} of night' if i == 0 else None))
                            for i, bad_stretch in enumerate(true_stretches(new_flags)):
                                plt.axvspan(frac_jds[bad_stretch.start] - djd / 2, frac_jds[bad_stretch.stop - 1] + djd / 2, zorder=0, color='orange', alpha=.75, lw=0,
                                            label=(f'Harmonized Flags:\n{np.mean(new_flags[~solar_flags]):.2%} of night' if i == 0 else None))
                            for i, bad_stretch in enumerate(true_stretches(fh.final_flags[ap_str] & ~new_flags)):
                                plt.axvspan(frac_jds[bad_stretch.start] - djd / 2, frac_jds[bad_stretch.stop - 1] + djd / 2, zorder=0, color='purple', alpha=.75, lw=0,
                                            label=(f'Other Final Flags:\n{np.mean(fh.final_flags[ap_str][~solar_flags]):.2%} of night' if i == 0 else None))                            

                            plt.legend(title=f'{ap_str}: {category}', loc='upper right')
                            plt.tight_layout()
                            plt.show()

Figure 4: Per-Antenna Flag Harmonization Summary¶

This figure shows antennas that had their flags non-trivially modified by this notebook and tries to show the underlying rationale for why that happened. Sometimes the flag harmonizaton performed here leads to the whole antenna getting flagged; sometimes it just leads to large chunks of the night getting flagged.

In [23]:
per_antenna_flag_harmonization_plots()

Save results¶

In [24]:
add_to_history = 'Produced by full_day_antenna_flagging notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
In [25]:
out_flag_files = [cal.replace(CAL_SUFFIX, OUT_FLAG_SUFFIX) for cal in cal_files]
for i, (cal, out_flag_file) in enumerate(zip(cal_files, out_flag_files)):
    # create UVFlag object based on UVCal
    uvc = UVCal()
    uvc.read_calfits(cal)
    uvf = UVFlag(uvc, mode='flag')
    uvf.use_future_array_shapes()
    
    # fill with flags
    for ant_ind, antnum in enumerate(uvf.ant_array):
        for pol_ind, polnum in enumerate(uvf.polarization_array):
            pol = {'Jee': 'e', 'Jnn': 'n'}[utils.jnum2str(polnum, x_orientation=uvf.x_orientation)]
            uvf.flag_array[ant_ind, :, :, pol_ind] = fh.final_flags[f'{antnum}{pol}'][i]

    # write to disk
    uvf.history += add_to_history        
    uvf.write(out_flag_file, clobber=True)
invalid value encountered in multiply

Metadata¶

In [26]:
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.2.3.dev158+gd5cadd5
hera_qm: 2.1.1.dev3+gb291d34
hera_filters: 0.1.0
hera_notebook_templates: 0.1.dev486+gfb8560a
pyuvdata: 2.3.0
In [27]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 1.50 minutes.