by Josh Dillon last updated June 19, 2023
This notebook parses and summarizes the output of the file_calibration
notebook to produce a report on per-antenna malfunctions on a daily basis.
Quick links:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
from hera_cal import io, utils
from hera_qm import ant_class
from uvtools.plot import plot_antpos, plot_antclass
%matplotlib inline
from IPython.display import display, HTML
# Parse settings from environment
ANT_CLASS_FOLDER = os.environ.get("ANT_CLASS_FOLDER", "./")
SUM_FILE = os.environ.get("SUM_FILE", None)
# ANT_CLASS_FOLDER = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459866'
# SUM_FILE = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459866/zen.2459866.25292.sum.uvh5'
OC_SKIP_OUTRIGGERS = os.environ.get("OC_SKIP_OUTRIGGERS", "TRUE").upper() == "TRUE"
for param in ['ANT_CLASS_FOLDER', 'SUM_FILE', 'OC_SKIP_OUTRIGGERS']:
print(f"{param} = '{eval(param)}'")
ANT_CLASS_FOLDER = '/mnt/sn1/2460217' SUM_FILE = '/mnt/sn1/2460217/zen.2460217.45543.sum.uvh5' OC_SKIP_OUTRIGGERS = 'True'
if SUM_FILE is not None:
from astropy.time import Time, TimeDelta
utc = Time(float(SUM_FILE.split('zen.')[-1].split('.sum.uvh5')[0]), format='jd').datetime
print(f'Date: {utc.month}-{utc.day}-{utc.year}')
Date: 9-29-2023
# set thresholds for fraction of the day
overall_thresh = .1
all_zero_thresh = .1
eo_zeros_thresh = .1
xengine_diff_thresh = .1
cross_pol_thresh = .5
bad_fem_thresh = .1
high_power_thresh = .1
low_power_thresh = .1
low_corr_thresh = .1
bad_shape_thresh = .5
excess_rfi_thresh = .1
chisq_thresh = .25
# Load csvs
csv_files = sorted(glob.glob(os.path.join(ANT_CLASS_FOLDER, '*.ant_class.csv')))
jds = [float(f.split('/')[-1].split('zen.')[-1].split('.sum')[0]) for f in csv_files]
tables = [pd.read_csv(f).dropna(axis=0, how='all') for f in csv_files]
table_cols = tables[0].columns[1::2]
class_cols = tables[0].columns[2::2]
print(f'Found {len(csv_files)} csv files starting with {csv_files[0]}')
Found 1905 csv files starting with /mnt/sn1/2460217/zen.2460217.23760.sum.ant_class.csv
# parse ant_strings
ap_strs = np.array(tables[0]['Antenna'])
ants = sorted(set(int(a[:-1]) for a in ap_strs))
translator = ''.maketrans('e', 'n') | ''.maketrans('n', 'e')
# get node numbers
node_dict = {ant: 'Unknown' for ant in ants}
try:
from hera_mc import cm_hookup
hookup = cm_hookup.get_hookup('default')
for ant_name in hookup:
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in node_dict:
if hookup[ant_name].get_part_from_type('node')['E<ground'] is not None:
node_dict[ant] = int(hookup[ant_name].get_part_from_type('node')['E<ground'][1:])
except:
pass
nodes = sorted(set(node_dict.values()))
def classification_array(col):
class_array = np.vstack([t[col] for t in tables])
class_array[class_array == 'good'] = 1.7
class_array[class_array == 'suspect'] = 1
class_array[class_array == 'bad'] = 0
return class_array.astype(float)
if SUM_FILE is not None:
hd = io.HERADataFastReader(SUM_FILE)
ap_tuples = [(int(ap[:-1]), {'e': 'Jee', 'n': 'Jnn'}[ap[-1]]) for ap in ap_strs]
bad_bools = np.mean(classification_array('Antenna Class') == 0, axis=0) > overall_thresh
bad_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[bad_bools]]
suspect_bools = np.mean(classification_array('Antenna Class') == 1, axis=0) > overall_thresh
suspect_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[suspect_bools] if ap_tuples[i] not in bad_aps]
good_aps = [ap for ap in ap_tuples if ap not in bad_aps and ap not in suspect_aps]
overall_class = ant_class.AntennaClassification(bad=bad_aps, suspect=suspect_aps, good=good_aps)
autos, _, _ = hd.read(bls=[bl for bl in hd.bls if utils.split_bl(bl)[0] == utils.split_bl(bl)[1]], read_flags=False, read_nsamples=False)
avg_unflagged_auto = {pol: np.mean([autos[bl] for bl in autos if bl[2] == pol and overall_class[utils.split_bl(bl)[0]] != 'bad'], axis=(0, 1)) for pol in ['ee', 'nn']}
def print_issue_summary(bad_ant_strs, title, notes='', plot=False):
'''Print report for list of bad antenna polarizations strings'''
unique_bad_antnums = [int(ap[:-1]) for ap in bad_ant_strs]
display(HTML(f'<h2>{title}: ({len(bad_ant_strs)} antpols across {len(set([ba[:-1] for ba in bad_ant_strs]))} antennas)</h2>'))
if len(notes) > 0:
display(HTML(f'<h4>{notes}</h4>'))
if len(bad_ant_strs) > 0:
print(f'All Bad Antpols: {", ".join(bad_ant_strs)}\n')
for node in nodes:
if np.any([node == node_dict[a] for a in unique_bad_antnums]):
aps = [ap for ap in bad_ant_strs if node_dict[int(ap[:-1])] == node]
whole_ants = [str(wa) for wa in set([int(ap[:-1]) for ap in aps if ap.translate(translator) in bad_ant_strs])]
single_pols = [ap for ap in aps if ap.translate(translator) not in bad_ant_strs]
print(f'Node {node}:')
print(f'\tAntpols ({len(aps)} total): {", ".join(aps)}')
print(f'\tWhole Ants ({len(whole_ants)} total): {", ".join(whole_ants)}')
print(f'\tSingle Pols ({len(single_pols)} total): {", ".join(single_pols)}')
if plot and SUM_FILE is not None:
fig, axes = plt.subplots(1, 2, figsize=(12,4), dpi=70, sharey=True, gridspec_kw={'wspace': 0})
for ax, pol in zip(axes, ['ee', 'nn']):
ax.semilogy(autos.freqs / 1e6, avg_unflagged_auto[pol], 'k--', label='Average\nUnflagged\nAuto')
for ap in aps:
ant = int(ap[:-1]), utils.comply_pol(ap[-1])
auto_bl = utils.join_bl(ant, ant)
if auto_bl[2] == pol:
ax.semilogy(autos.freqs / 1e6, np.mean(autos[auto_bl], axis=0), label=ap)
ax.legend()
ax.set_xlim([40, 299])
ax.set_title(f'{title} on Node {node} ({pol}-antennas)')
ax.set_xlabel('Frequency (MHz)')
axes[0].set_ylabel('Single File Raw Autocorrelation')
plt.tight_layout()
plt.show()
# precompute various helpful quantities
all_slopes = np.vstack([t['Autocorr Slope'] for t in tables])
median_slope = np.median(all_slopes)
bad_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'bad'
suspect_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'suspect'
bad_shapes = np.vstack([t['Autocorr Shape Class'] for t in tables]) == 'bad'
suspect_shapes = np.vstack([t['Autocorr Shape Class'] for t in tables]) == 'suspect'
all_powers = np.vstack([t['Autocorr Power'] for t in tables])
median_power = np.median(all_powers)
bad_powers = np.vstack([t['Autocorr Power Class'] for t in tables]) == 'bad'
suspect_powers = np.vstack([t['Autocorr Power Class'] for t in tables]) == 'suspect'
bad_rfi = np.vstack([t['RFI in Autos Class'] for t in tables]) == 'bad'
suspect_rfi = np.vstack([t['RFI in Autos Class'] for t in tables]) == 'suspect'
# find all zeros
all_zeros_strs = ap_strs[np.mean(np.vstack([t['Dead? Class'] for t in tables]) == 'bad', axis=0) > all_zero_thresh]
# find even/odd zeros
eo_zeros_strs = ap_strs[np.mean(np.vstack([t['Even/Odd Zeros Class'] for t in tables]) == 'bad', axis=0) > eo_zeros_thresh]
eo_zeros_strs = [ap for ap in eo_zeros_strs if ap not in all_zeros_strs]
# find cross-polarized antennas
cross_pol_strs = ap_strs[np.mean(np.vstack([t['Cross-Polarized Class'] for t in tables]) == 'bad', axis=0) > cross_pol_thresh]
cross_pol_strs = [ap for ap in cross_pol_strs if ap not in all_zeros_strs]
# find FEM power issues: must be low power, high slope, and bad or suspect in power, slope, rfi, and shape
fem_off_prod = (bad_powers + .5 * suspect_powers) * (bad_slopes + .5 * suspect_slopes)
fem_off_prod *= (bad_rfi + .5 * suspect_rfi) * (bad_shapes + .5 * suspect_shapes)
fem_off_strs = ap_strs[np.mean(fem_off_prod * (all_powers < median_power) * (all_slopes > median_slope), axis=0) > .1]
# find high power issues
high_power_strs = ap_strs[np.mean(bad_powers & (all_powers > median_power), axis=0) > high_power_thresh]
# find other low power issues
low_power_strs = ap_strs[np.mean(bad_powers & (all_powers < median_power), axis=0) > low_power_thresh]
low_power_strs = [ap for ap in low_power_strs if ap not in all_zeros_strs and ap not in fem_off_strs]
# find low correlation (but not low power)
low_corr_strs = ap_strs[np.mean(np.vstack([t['Low Correlation Class'] for t in tables]) == 'bad', axis=0) > low_corr_thresh]
low_corr_strs = [ap for ap in low_corr_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs))]
# find bad bandpasses
bad_bandpass_strs = ap_strs[np.mean(bad_shapes, axis=0) > bad_shape_thresh]
bad_bandpass_strs = [ap for ap in bad_bandpass_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs))]
# find antennas with excess RFI
excess_rfi_strs = ap_strs[np.mean(np.vstack([t['RFI in Autos Class'] for t in tables]) == 'bad', axis=0) > excess_rfi_thresh]
excess_rfi_strs = [ap for ap in excess_rfi_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs) |
set(bad_bandpass_strs) | set(high_power_strs))]
# find bad x-engine diffs
xengine_diff_strs = ap_strs[np.mean(np.vstack([t['Bad Diff X-Engines Class'] for t in tables]) == 'bad', axis=0) > xengine_diff_thresh]
xengine_diff_strs = [ap for ap in xengine_diff_strs if ap not in (set(bad_bandpass_strs) | set(low_power_strs) | set(excess_rfi_strs) | set(low_corr_strs) |
set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs) | set(eo_zeros_strs))]
# find antennas with high redcal chi^2
chisq_strs = ap_strs[np.mean(np.vstack([t['Redcal chi^2 Class'] for t in tables]) == 'bad', axis=0) > chisq_thresh]
chisq_strs = [ap for ap in chisq_strs if ap not in (set(bad_bandpass_strs) | set(low_power_strs) | set(excess_rfi_strs) | set(low_corr_strs) |
set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs) | set(eo_zeros_strs) | set(xengine_diff_strs))]
if OC_SKIP_OUTRIGGERS:
chisq_strs = [ap for ap in chisq_strs if int(ap[:-1]) < 320]
# collect all results
to_print = [(all_zeros_strs, 'All-Zeros', 'These antennas have visibilities that are more than half zeros.'),
(eo_zeros_strs, 'Excess Zeros in Either Even or Odd Spectra',
'These antennas are showing evidence of packet loss or X-engine failure.', True),
(xengine_diff_strs, 'Excess Power in X-Engine Diffs',
'These antennas are showing evidence of mis-written packets in either the evens or the odds.', True),
(cross_pol_strs, 'Cross-Polarized', 'These antennas have their east and north cables swapped.'),
(fem_off_strs, 'Likely FEM Power Issue', 'These antennas have low power, anomolously high slopes, and extra channels identified as RFI.', True),
(high_power_strs, 'High Power', 'These antennas have high median power.', True),
(low_power_strs, 'Other Low Power Issues', 'These antennas have low power, but are not all-zeros and not FEM off.', True),
(low_corr_strs, 'Low Correlation, But Not Low Power', 'These antennas are low correlation, but their autocorrelation power levels look OK.'),
(bad_bandpass_strs, 'Bad Bandpass Shapes, But Not Bad Power',
'These antennas have unusual bandpass shapes, but are not all-zeros, high power, low power, or FEM off.', True),
(excess_rfi_strs, 'Excess RFI', 'These antennas have excess strucutre (identified as possible RFI) in their bandpassed relative to the ' + \
'median antenna, but not low or high power or a bad bandpass.', True),
(chisq_strs, 'Redcal chi^2', 'These antennas have been idenfied as not redundantly calibrating well, even after passing the above checks.')]
def print_high_level_summary():
for tp in sorted(to_print, key=lambda x: len(x[0]), reverse=True):
print(f'{len(tp[0])} antpols (on {len(set([ap[:-1] for ap in tp[0]]))} antennas) frequently flagged for {tp[1]}.')
def print_all_issue_summaries():
for tp in to_print:
print_issue_summary(*tp)
print_high_level_summary()
52 antpols (on 26 antennas) frequently flagged for All-Zeros. 36 antpols (on 29 antennas) frequently flagged for Likely FEM Power Issue. 18 antpols (on 18 antennas) frequently flagged for Bad Bandpass Shapes, But Not Bad Power. 10 antpols (on 9 antennas) frequently flagged for Excess RFI. 9 antpols (on 7 antennas) frequently flagged for Other Low Power Issues. 7 antpols (on 7 antennas) frequently flagged for Excess Power in X-Engine Diffs. 5 antpols (on 5 antennas) frequently flagged for Low Correlation, But Not Low Power. 0 antpols (on 0 antennas) frequently flagged for Excess Zeros in Either Even or Odd Spectra. 0 antpols (on 0 antennas) frequently flagged for Cross-Polarized. 0 antpols (on 0 antennas) frequently flagged for High Power. 0 antpols (on 0 antennas) frequently flagged for Redcal chi^2.
print_all_issue_summaries()
All Bad Antpols: 63e, 63n, 64e, 64n, 78e, 78n, 156e, 156n, 157e, 157n, 158e, 158n, 175e, 175n, 195e, 195n, 203e, 203n, 212e, 212n, 214e, 214n, 219e, 219n, 231e, 231n, 232e, 232n, 252e, 252n, 253e, 253n, 267e, 267n, 268e, 268n, 282e, 282n, 283e, 283n, 326e, 326n, 327e, 327n, 331e, 331n, 332e, 332n, 336e, 336n, 340e, 340n Node 6: Antpols (6 total): 63e, 63n, 64e, 64n, 78e, 78n Whole Ants (3 total): 64, 78, 63 Single Pols (0 total): Node 12: Antpols (6 total): 156e, 156n, 157e, 157n, 158e, 158n Whole Ants (3 total): 156, 157, 158 Single Pols (0 total): Node 18: Antpols (4 total): 203e, 203n, 219e, 219n Whole Ants (2 total): 203, 219 Single Pols (0 total): Node 21: Antpols (24 total): 175e, 175n, 195e, 195n, 212e, 212n, 214e, 214n, 231e, 231n, 232e, 232n, 326e, 326n, 327e, 327n, 331e, 331n, 332e, 332n, 336e, 336n, 340e, 340n Whole Ants (12 total): 195, 326, 231, 232, 327, 331, 332, 175, 336, 212, 340, 214 Single Pols (0 total): Node 22: Antpols (12 total): 252e, 252n, 253e, 253n, 267e, 267n, 268e, 268n, 282e, 282n, 283e, 283n Whole Ants (6 total): 267, 268, 282, 283, 252, 253 Single Pols (0 total):
All Bad Antpols: 77n, 87e, 93e, 112n, 144n, 151e, 159n Node 6: Antpols (1 total): 77n Whole Ants (0 total): Single Pols (1 total): 77n
Node 8: Antpols (1 total): 87e Whole Ants (0 total): Single Pols (1 total): 87e
Node 10: Antpols (2 total): 93e, 112n Whole Ants (0 total): Single Pols (2 total): 93e, 112n
Node 13: Antpols (1 total): 159n Whole Ants (0 total): Single Pols (1 total): 159n
Node 14: Antpols (1 total): 144n Whole Ants (0 total): Single Pols (1 total): 144n
Node 16: Antpols (1 total): 151e Whole Ants (0 total): Single Pols (1 total): 151e
All Bad Antpols: 16n, 18n, 27e, 28n, 34e, 86n, 92e, 92n, 93n, 94e, 94n, 104n, 106e, 109e, 109n, 111e, 111n, 115e, 125e, 129e, 170e, 174e, 174n, 176e, 182n, 183n, 184e, 194e, 194n, 200e, 211n, 216n, 246n, 328n, 329e, 329n Node 1: Antpols (4 total): 16n, 18n, 27e, 28n Whole Ants (0 total): Single Pols (4 total): 16n, 18n, 27e, 28n
Node 6: Antpols (1 total): 34e Whole Ants (0 total): Single Pols (1 total): 34e