Antenna Classification Daily Summary¶
by Josh Dillon last updated June 19, 2023
This notebook parses and summarizes the output of the file_calibration
notebook to produce a report on per-antenna malfunctions on a daily basis.
Quick links:
• Summary of Per Antenna Issues¶
• Figure 1: Per File Overall Antenna Classification Summary¶
• Figure 2: Per Classifier Antenna Flagging Summary¶
• Figure 3: Array Visualization of Overall Daily Classification¶
In [1]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin # REQUIRED to have the compression plugins available
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
from hera_cal import io, utils
from hera_qm import ant_class
from uvtools.plot import plot_antpos, plot_antclass
%matplotlib inline
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore') # get rid of red warnings
%config InlineBackend.figure_format = 'retina'
Settings¶
In [2]:
# Parse settings from environment
ANT_CLASS_FOLDER = os.environ.get("ANT_CLASS_FOLDER", "./")
SUM_FILE = os.environ.get("SUM_FILE", None)
# ANT_CLASS_FOLDER = "/mnt/sn1/2460330"
# SUM_FILE = "/mnt/sn1/2460330/zen.2460330.25463.sum.uvh5"
OC_SKIP_OUTRIGGERS = os.environ.get("OC_SKIP_OUTRIGGERS", "TRUE").upper() == "TRUE"
for param in ['ANT_CLASS_FOLDER', 'SUM_FILE', 'OC_SKIP_OUTRIGGERS']:
print(f"{param} = '{eval(param)}'")
ANT_CLASS_FOLDER = '/mnt/sn1/data1/2460489' SUM_FILE = '/mnt/sn1/data1/2460489/zen.2460489.34455.sum.uvh5' OC_SKIP_OUTRIGGERS = 'True'
In [3]:
if SUM_FILE is not None:
from astropy.time import Time, TimeDelta
utc = Time(float(SUM_FILE.split('zen.')[-1].split('.sum.uvh5')[0]), format='jd').datetime
print(f'Date: {utc.month}-{utc.day}-{utc.year}')
Date: 6-27-2024
In [4]:
# set thresholds for fraction of the day
overall_thresh = .1
all_zero_thresh = .1
eo_zeros_thresh = .1
xengine_diff_thresh = .1
cross_pol_thresh = .5
bad_fem_thresh = .1
high_power_thresh = .1
low_power_thresh = .1
low_corr_thresh = .1
bad_shape_thresh = .5
excess_rfi_thresh = .1
chisq_thresh = .25
Load classifications and other metadata¶
In [5]:
# Load csvs
csv_files = sorted(glob.glob(os.path.join(ANT_CLASS_FOLDER, '*.ant_class.csv')))
jds = [float(f.split('/')[-1].split('zen.')[-1].split('.sum')[0]) for f in csv_files]
tables = [pd.read_csv(f).dropna(axis=0, how='all') for f in csv_files]
table_cols = tables[0].columns[1::2]
class_cols = tables[0].columns[2::2]
print(f'Found {len(csv_files)} csv files starting with {csv_files[0]}')
Found 1571 csv files starting with /mnt/sn1/data1/2460489/zen.2460489.16872.sum.ant_class.csv
In [6]:
# parse ant_strings
ap_strs = np.array(tables[0]['Antenna'])
ants = sorted(set(int(a[:-1]) for a in ap_strs))
translator = ''.maketrans('e', 'n') | ''.maketrans('n', 'e')
In [7]:
# get node numbers
node_dict = {ant: 'Unknown' for ant in ants}
try:
from hera_mc import cm_hookup
hookup = cm_hookup.get_hookup('default')
for ant_name in hookup:
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in node_dict:
if hookup[ant_name].get_part_from_type('node')['E<ground'] is not None:
node_dict[ant] = int(hookup[ant_name].get_part_from_type('node')['E<ground'][1:])
except:
pass
nodes = sorted(set(node_dict.values()))
In [8]:
def classification_array(col):
class_array = np.vstack([t[col] for t in tables])
class_array[class_array == 'good'] = 1.7
class_array[class_array == 'suspect'] = 1
class_array[class_array == 'bad'] = 0
return class_array.astype(float)
In [9]:
if SUM_FILE is not None:
hd = io.HERADataFastReader(SUM_FILE)
ap_tuples = [(int(ap[:-1]), {'e': 'Jee', 'n': 'Jnn'}[ap[-1]]) for ap in ap_strs]
bad_bools = np.mean(classification_array('Antenna Class') == 0, axis=0) > overall_thresh
bad_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[bad_bools]]
suspect_bools = np.mean(classification_array('Antenna Class') == 1, axis=0) > overall_thresh
suspect_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[suspect_bools] if ap_tuples[i] not in bad_aps]
good_aps = [ap for ap in ap_tuples if ap not in bad_aps and ap not in suspect_aps]
overall_class = ant_class.AntennaClassification(bad=bad_aps, suspect=suspect_aps, good=good_aps)
autos, _, _ = hd.read(bls=[bl for bl in hd.bls if utils.split_bl(bl)[0] == utils.split_bl(bl)[1]], read_flags=False, read_nsamples=False)
avg_unflagged_auto = {}
for pol in ['ee', 'nn']:
unflagged_autos = [autos[bl] for bl in autos if bl[2] == pol and overall_class[utils.split_bl(bl)[0]] != 'bad']
if len(unflagged_autos) > 0:
avg_unflagged_auto[pol] = np.mean(unflagged_autos, axis=(0, 1))
else:
avg_unflagged_auto[pol] = np.zeros(len(hd.freqs), dtype=complex)
Figure out and summarize per-antenna issues¶
In [10]:
def print_issue_summary(bad_ant_strs, title, notes='', plot=False):
'''Print report for list of bad antenna polarizations strings'''
unique_bad_antnums = [int(ap[:-1]) for ap in bad_ant_strs]
display(HTML(f'<h2>{title}: ({len(bad_ant_strs)} antpols across {len(set([ba[:-1] for ba in bad_ant_strs]))} antennas)</h2>'))
if len(notes) > 0:
display(HTML(f'<h4>{notes}</h4>'))
if len(bad_ant_strs) > 0:
print(f'All Bad Antpols: {", ".join(bad_ant_strs)}\n')
for node in nodes:
if np.any([node == node_dict[a] for a in unique_bad_antnums]):
aps = [ap for ap in bad_ant_strs if node_dict[int(ap[:-1])] == node]
whole_ants = [str(wa) for wa in set([int(ap[:-1]) for ap in aps if ap.translate(translator) in bad_ant_strs])]
single_pols = [ap for ap in aps if ap.translate(translator) not in bad_ant_strs]
print(f'Node {node}:')
print(f'\tAntpols ({len(aps)} total): {", ".join(aps)}')
print(f'\tWhole Ants ({len(whole_ants)} total): {", ".join(whole_ants)}')
print(f'\tSingle Pols ({len(single_pols)} total): {", ".join(single_pols)}')
if plot and SUM_FILE is not None:
fig, axes = plt.subplots(1, 2, figsize=(12,4), dpi=70, sharey=True, gridspec_kw={'wspace': 0})
for ax, pol in zip(axes, ['ee', 'nn']):
ax.semilogy(autos.freqs / 1e6, avg_unflagged_auto[pol], 'k--', label='Average\nUnflagged\nAuto')
for ap in aps:
ant = int(ap[:-1]), utils.comply_pol(ap[-1])
auto_bl = utils.join_bl(ant, ant)
if auto_bl[2] == pol:
ax.semilogy(autos.freqs / 1e6, np.mean(autos[auto_bl], axis=0), label=ap)
ax.legend()
ax.set_xlim([40, 299])
ax.set_title(f'{title} on Node {node} ({pol}-antennas)')
ax.set_xlabel('Frequency (MHz)')
axes[0].set_ylabel('Single File Raw Autocorrelation')
plt.tight_layout()
plt.show()
In [11]:
# precompute various helpful quantities
all_slopes = np.vstack([t['Autocorr Slope'] for t in tables])
median_slope = np.median(all_slopes)
bad_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'bad'
suspect_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'suspect'
bad_shapes = np.vstack([t['Autocorr Shape Class'] for t in tables]) == 'bad'
suspect_shapes = np.vstack([t['Autocorr Shape Class'] for t in tables]) == 'suspect'
all_powers = np.vstack([t['Autocorr Power'] for t in tables])
median_power = np.median(all_powers)
bad_powers = np.vstack([t['Autocorr Power Class'] for t in tables]) == 'bad'
suspect_powers = np.vstack([t['Autocorr Power Class'] for t in tables]) == 'suspect'
bad_rfi = np.vstack([t['Auto RFI RMS Class'] for t in tables]) == 'bad'
suspect_rfi = np.vstack([t['Auto RFI RMS Class'] for t in tables]) == 'suspect'
In [12]:
# find all zeros
all_zeros_strs = ap_strs[np.mean(np.vstack([t['Dead? Class'] for t in tables]) == 'bad', axis=0) > all_zero_thresh]
In [13]:
# find even/odd zeros
eo_zeros_strs = ap_strs[np.mean(np.vstack([t['Even/Odd Zeros Class'] for t in tables]) == 'bad', axis=0) > eo_zeros_thresh]
eo_zeros_strs = [ap for ap in eo_zeros_strs if ap not in all_zeros_strs]
In [14]:
# find cross-polarized antennas
cross_pol_strs = ap_strs[np.mean(np.vstack([t['Cross-Polarized Class'] for t in tables]) == 'bad', axis=0) > cross_pol_thresh]
cross_pol_strs = [ap for ap in cross_pol_strs if ap not in all_zeros_strs]
In [15]:
# find FEM power issues: must be low power, high slope, and bad or suspect in power, slope, rfi, and shape
fem_off_prod = (bad_powers + .5 * suspect_powers) * (bad_slopes + .5 * suspect_slopes)
fem_off_prod *= (bad_rfi + .5 * suspect_rfi) * (bad_shapes + .5 * suspect_shapes)
fem_off_strs = ap_strs[np.mean(fem_off_prod * (all_powers < median_power) * (all_slopes > median_slope), axis=0) > .1]
In [16]:
# find high power issues
high_power_strs = ap_strs[np.mean(bad_powers & (all_powers > median_power), axis=0) > high_power_thresh]
In [17]:
# find other low power issues
low_power_strs = ap_strs[np.mean(bad_powers & (all_powers < median_power), axis=0) > low_power_thresh]
low_power_strs = [ap for ap in low_power_strs if ap not in all_zeros_strs and ap not in fem_off_strs]
In [18]:
# find low correlation (but not low power)
low_corr_strs = ap_strs[np.mean(np.vstack([t['Low Correlation Class'] for t in tables]) == 'bad', axis=0) > low_corr_thresh]
low_corr_strs = [ap for ap in low_corr_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs))]
In [19]:
# find bad bandpasses
bad_bandpass_strs = ap_strs[np.mean(bad_shapes, axis=0) > bad_shape_thresh]
bad_bandpass_strs = [ap for ap in bad_bandpass_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs))]
In [20]:
# find antennas with excess RFI
excess_rfi_strs = ap_strs[np.mean(np.vstack([t['Auto RFI RMS Class'] for t in tables]) == 'bad', axis=0) > excess_rfi_thresh]
excess_rfi_strs = [ap for ap in excess_rfi_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs) |
set(bad_bandpass_strs) | set(high_power_strs))]
In [21]:
# find bad x-engine diffs
xengine_diff_strs = ap_strs[np.mean(np.vstack([t['Bad Diff X-Engines Class'] for t in tables]) == 'bad', axis=0) > xengine_diff_thresh]
xengine_diff_strs = [ap for ap in xengine_diff_strs if ap not in (set(bad_bandpass_strs) | set(low_power_strs) | set(excess_rfi_strs) | set(low_corr_strs) |
set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs) | set(eo_zeros_strs))]
In [22]:
# find antennas with high redcal chi^2
chisq_strs = ap_strs[np.mean(np.vstack([t['Redcal chi^2 Class'] for t in tables]) == 'bad', axis=0) > chisq_thresh]
chisq_strs = [ap for ap in chisq_strs if ap not in (set(bad_bandpass_strs) | set(low_power_strs) | set(excess_rfi_strs) | set(low_corr_strs) |
set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs) | set(eo_zeros_strs) | set(xengine_diff_strs))]
if OC_SKIP_OUTRIGGERS:
chisq_strs = [ap for ap in chisq_strs if int(ap[:-1]) < 320]
In [23]:
# collect all results
to_print = [(all_zeros_strs, 'All-Zeros', 'These antennas have visibilities that are more than half zeros.'),
(eo_zeros_strs, 'Excess Zeros in Either Even or Odd Spectra',
'These antennas are showing evidence of packet loss or X-engine failure.', True),
(xengine_diff_strs, 'Excess Power in X-Engine Diffs',
'These antennas are showing evidence of mis-written packets in either the evens or the odds.', True),
(cross_pol_strs, 'Cross-Polarized', 'These antennas have their east and north cables swapped.'),
(fem_off_strs, 'Likely FEM Power Issue', 'These antennas have low power and anomolously high slopes.', True),
(high_power_strs, 'High Power', 'These antennas have high median power.', True),
(low_power_strs, 'Other Low Power Issues', 'These antennas have low power, but are not all-zeros and not FEM off.', True),
(low_corr_strs, 'Low Correlation, But Not Low Power', 'These antennas are low correlation, but their autocorrelation power levels look OK.'),
(bad_bandpass_strs, 'Bad Bandpass Shapes, But Not Bad Power',
'These antennas have unusual bandpass shapes, but are not all-zeros, high power, low power, or FEM off.', True),
(excess_rfi_strs, 'Excess RFI', 'These antennas have excess RMS after DPSS filtering (likely RFI), but not low or high power or a bad bandpass.', True),
(chisq_strs, 'Redcal chi^2', 'These antennas have been idenfied as not redundantly calibrating well, even after passing the above checks.')]
In [24]:
def print_high_level_summary():
for tp in sorted(to_print, key=lambda x: len(x[0]), reverse=True):
print(f'{len(tp[0])} antpols (on {len(set([ap[:-1] for ap in tp[0]]))} antennas) frequently flagged for {tp[1]}.')
def print_all_issue_summaries():
for tp in to_print:
print_issue_summary(*tp)
Summary of Per-Antenna Issues¶
In [25]:
print_high_level_summary()
50 antpols (on 25 antennas) frequently flagged for All-Zeros. 24 antpols (on 19 antennas) frequently flagged for High Power. 21 antpols (on 19 antennas) frequently flagged for Excess RFI. 17 antpols (on 12 antennas) frequently flagged for Redcal chi^2. 16 antpols (on 14 antennas) frequently flagged for Likely FEM Power Issue. 12 antpols (on 8 antennas) frequently flagged for Low Correlation, But Not Low Power. 7 antpols (on 7 antennas) frequently flagged for Bad Bandpass Shapes, But Not Bad Power. 6 antpols (on 3 antennas) frequently flagged for Cross-Polarized. 5 antpols (on 4 antennas) frequently flagged for Excess Power in X-Engine Diffs. 1 antpols (on 1 antennas) frequently flagged for Other Low Power Issues. 0 antpols (on 0 antennas) frequently flagged for Excess Zeros in Either Even or Odd Spectra.
In [26]:
print_all_issue_summaries()
All-Zeros: (50 antpols across 25 antennas)
These antennas have visibilities that are more than half zeros.
All Bad Antpols: 47e, 47n, 61e, 61n, 63e, 63n, 64e, 64n, 77e, 77n, 78e, 78n, 88e, 88n, 90e, 90n, 107e, 107n, 176e, 176n, 177e, 177n, 178e, 178n, 225e, 225n, 226e, 226n, 227e, 227n, 228e, 228n, 229e, 229n, 240e, 240n, 241e, 241n, 242e, 242n, 243e, 243n, 251e, 251n, 272e, 272n, 281e, 281n, 295e, 295n Node 6: Antpols (12 total): 47e, 47n, 61e, 61n, 63e, 63n, 64e, 64n, 77e, 77n, 78e, 78n Whole Ants (6 total): 64, 77, 78, 47, 61, 63 Single Pols (0 total): Node 9: Antpols (6 total): 88e, 88n, 90e, 90n, 107e, 107n Whole Ants (3 total): 88, 90, 107 Single Pols (0 total): Node 12: Antpols (6 total): 176e, 176n, 177e, 177n, 178e, 178n Whole Ants (3 total): 176, 177, 178 Single Pols (0 total): Node 19: Antpols (12 total): 225e, 225n, 226e, 226n, 240e, 240n, 241e, 241n, 242e, 242n, 243e, 243n Whole Ants (6 total): 225, 226, 240, 241, 242, 243 Single Pols (0 total): Node 20: Antpols (6 total): 227e, 227n, 228e, 228n, 229e, 229n Whole Ants (3 total): 227, 228, 229 Single Pols (0 total): Node 22: Antpols (6 total): 251e, 251n, 281e, 281n, 295e, 295n Whole Ants (3 total): 281, 251, 295 Single Pols (0 total): Node 23: Antpols (2 total): 272e, 272n Whole Ants (1 total): 272 Single Pols (0 total):
Excess Zeros in Either Even or Odd Spectra: (0 antpols across 0 antennas)
These antennas are showing evidence of packet loss or X-engine failure.
Excess Power in X-Engine Diffs: (5 antpols across 4 antennas)
These antennas are showing evidence of mis-written packets in either the evens or the odds.
All Bad Antpols: 82n, 130e, 130n, 266e, 340n Node 7: Antpols (1 total): 82n Whole Ants (0 total): Single Pols (1 total): 82n
Node 10: Antpols (2 total): 130e, 130n Whole Ants (1 total): 130 Single Pols (0 total):
Node 21: Antpols (1 total): 340n Whole Ants (0 total): Single Pols (1 total): 340n
Node 22: Antpols (1 total): 266e Whole Ants (0 total): Single Pols (1 total): 266e
Cross-Polarized: (6 antpols across 3 antennas)
These antennas have their east and north cables swapped.
All Bad Antpols: 86e, 86n, 196e, 196n, 255e, 255n Node 8: Antpols (2 total): 86e, 86n Whole Ants (1 total): 86 Single Pols (0 total): Node 17: Antpols (2 total): 196e, 196n Whole Ants (1 total): 196 Single Pols (0 total): Node 23: Antpols (2 total): 255e, 255n Whole Ants (1 total): 255 Single Pols (0 total):
Likely FEM Power Issue: (16 antpols across 14 antennas)
These antennas have low power and anomolously high slopes.
All Bad Antpols: 3n, 16e, 16n, 27e, 34e, 46n, 86n, 96n, 104n, 109n, 170e, 171n, 200e, 218n, 332e, 332n Node 1: Antpols (4 total): 3n, 16e, 16n, 27e Whole Ants (1 total): 16 Single Pols (2 total): 3n, 27e
Node 5: Antpols (1 total): 46n Whole Ants (0 total): Single Pols (1 total): 46n
Node 6: Antpols (1 total): 34e Whole Ants (0 total): Single Pols (1 total): 34e