by Josh Dillon last updated October 17, 2022
This notebook parses and summarizes the output of the file_calibration
notebook to produce a report on per-antenna malfunctions on a daily basis.
Quick links:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
from hera_cal import io, utils
from hera_qm import ant_class
from uvtools.plot import plot_antpos, plot_antclass
%matplotlib inline
from IPython.display import display, HTML
# Parse settings from environment
ANT_CLASS_FOLDER = os.environ.get("ANT_CLASS_FOLDER", "./")
SUM_FILE = os.environ.get("SUM_FILE", None)
OC_SKIP_OUTRIGGERS = os.environ.get("OC_SKIP_OUTRIGGERS", "TRUE").upper() == "TRUE"
for param in ['ANT_CLASS_FOLDER', 'SUM_FILE', 'OC_SKIP_OUTRIGGERS']:
print(f"{param} = '{eval(param)}'")
ANT_CLASS_FOLDER = '/mnt/sn1/2460100' SUM_FILE = '/mnt/sn1/2460100/zen.2460100.44175.sum.uvh5' OC_SKIP_OUTRIGGERS = 'True'
if SUM_FILE is not None:
from astropy.time import Time, TimeDelta
utc = Time(float(SUM_FILE.split('zen.')[-1].split('.sum.uvh5')[0]), format='jd').datetime
print(f'Date: {utc.month}-{utc.day}-{utc.year}')
Date: 6-4-2023
# set thresholds for fraction of the day
overall_thresh = .1
all_zero_thresh = .1
eo_zeros_thresh = .1
cross_pol_thresh = .5
bad_fem_thresh = .1
high_power_thresh = .1
low_power_thresh = .1
low_corr_thresh = .1
bad_shape_thresh = .5
excess_rfi_thresh = .1
chisq_thresh = .25
# Load csvs
csv_files = sorted(glob.glob(os.path.join(ANT_CLASS_FOLDER, '*.ant_class.csv')))
jds = [float(f.split('/')[-1].split('zen.')[-1].split('.sum')[0]) for f in csv_files]
tables = [pd.read_csv(f).dropna(axis=0, how='all') for f in csv_files]
table_cols = tables[0].columns[1::2]
class_cols = tables[0].columns[2::2]
print(f'Found {len(csv_files)} csv files starting with {csv_files[0]}')
Found 1655 csv files starting with /mnt/sn1/2460100/zen.2460100.21285.sum.ant_class.csv
# parse ant_strings
ap_strs = np.array(tables[0]['Antenna'])
ants = sorted(set(int(a[:-1]) for a in ap_strs))
translator = ''.maketrans('e', 'n') | ''.maketrans('n', 'e')
# get node numbers
node_dict = {ant: 'Unknown' for ant in ants}
try:
from hera_mc import cm_hookup
hookup = cm_hookup.get_hookup('default')
for ant_name in hookup:
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in node_dict:
if hookup[ant_name].get_part_from_type('node')['E<ground'] is not None:
node_dict[ant] = int(hookup[ant_name].get_part_from_type('node')['E<ground'][1:])
except:
pass
nodes = sorted(set(node_dict.values()))
def classification_array(col):
class_array = np.vstack([t[col] for t in tables])
class_array[class_array == 'good'] = 1.7
class_array[class_array == 'suspect'] = 1
class_array[class_array == 'bad'] = 0
return class_array.astype(float)
if SUM_FILE is not None:
hd = io.HERADataFastReader(SUM_FILE)
ap_tuples = [(int(ap[:-1]), {'e': 'Jee', 'n': 'Jnn'}[ap[-1]]) for ap in ap_strs]
bad_bools = np.mean(classification_array('Antenna Class') == 0, axis=0) > overall_thresh
bad_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[bad_bools]]
suspect_bools = np.mean(classification_array('Antenna Class') == 1, axis=0) > overall_thresh
suspect_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[suspect_bools] if ap_tuples[i] not in bad_aps]
good_aps = [ap for ap in ap_tuples if ap not in bad_aps and ap not in suspect_aps]
overall_class = ant_class.AntennaClassification(bad=bad_aps, suspect=suspect_aps, good=good_aps)
autos, _, _ = hd.read(bls=[bl for bl in hd.bls if utils.split_bl(bl)[0] == utils.split_bl(bl)[1]], read_flags=False, read_nsamples=False)
avg_unflagged_auto = {pol: np.mean([autos[bl] for bl in autos if bl[2] == pol and overall_class[utils.split_bl(bl)[0]] != 'bad'], axis=(0, 1)) for pol in ['ee', 'nn']}
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [9], line 4 2 hd = io.HERADataFastReader(SUM_FILE) 3 ap_tuples = [(int(ap[:-1]), {'e': 'Jee', 'n': 'Jnn'}[ap[-1]]) for ap in ap_strs] ----> 4 bad_bools = np.mean(classification_array('Antenna Class') == 0, axis=0) > overall_thresh 5 bad_aps = [ap_tuples[i] for i in np.arange(len(ap_tuples))[bad_bools]] 6 suspect_bools = np.mean(classification_array('Antenna Class') == 1, axis=0) > overall_thresh Cell In [8], line 2, in classification_array(col) 1 def classification_array(col): ----> 2 class_array = np.vstack([t[col] for t in tables]) 3 class_array[class_array == 'good'] = 1.7 4 class_array[class_array == 'suspect'] = 1 File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
def print_issue_summary(bad_ant_strs, title, notes='', plot=False):
'''Print report for list of bad antenna polarizations strings'''
unique_bad_antnums = [int(ap[:-1]) for ap in bad_ant_strs]
display(HTML(f'<h2>{title}: ({len(bad_ant_strs)} antpols across {len(set([ba[:-1] for ba in bad_ant_strs]))} antennas)</h2>'))
if len(notes) > 0:
display(HTML(f'<h4>{notes}</h4>'))
if len(bad_ant_strs) > 0:
print(f'All Bad Antpols: {", ".join(bad_ant_strs)}\n')
for node in nodes:
if np.any([node == node_dict[a] for a in unique_bad_antnums]):
aps = [ap for ap in bad_ant_strs if node_dict[int(ap[:-1])] == node]
whole_ants = [str(wa) for wa in set([int(ap[:-1]) for ap in aps if ap.translate(translator) in bad_ant_strs])]
single_pols = [ap for ap in aps if ap.translate(translator) not in bad_ant_strs]
print(f'Node {node}:')
print(f'\tAntpols ({len(aps)} total): {", ".join(aps)}')
print(f'\tWhole Ants ({len(whole_ants)} total): {", ".join(whole_ants)}')
print(f'\tSingle Pols ({len(single_pols)} total): {", ".join(single_pols)}')
if plot and SUM_FILE is not None:
fig, axes = plt.subplots(1, 2, figsize=(12,4), dpi=70, sharey=True, gridspec_kw={'wspace': 0})
for ax, pol in zip(axes, ['ee', 'nn']):
ax.semilogy(autos.freqs / 1e6, avg_unflagged_auto[pol], 'k--', label='Average\nUnflagged\nAuto')
for ap in aps:
ant = int(ap[:-1]), utils.comply_pol(ap[-1])
auto_bl = utils.join_bl(ant, ant)
if auto_bl[2] == pol:
ax.semilogy(autos.freqs / 1e6, np.mean(autos[auto_bl], axis=0), label=ap)
ax.legend()
ax.set_xlim([40, 299])
ax.set_title(f'{title} on Node {node} ({pol}-antennas)')
ax.set_xlabel('Frequency (MHz)')
axes[0].set_ylabel('Single File Raw Autocorrelation')
plt.tight_layout()
plt.show()
# precompute various helpful quantities
all_slopes = np.vstack([t['Autocorr Slope'] for t in tables])
median_slope = np.median(all_slopes)
bad_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'bad'
suspect_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'suspect'
bad_shapes = np.vstack([t['Autocorr Shape Class'] for t in tables]) == 'bad'
suspect_shapes = np.vstack([t['Autocorr Shape Class'] for t in tables]) == 'suspect'
all_powers = np.vstack([t['Autocorr Power'] for t in tables])
median_power = np.median(all_powers)
bad_powers = np.vstack([t['Autocorr Power Class'] for t in tables]) == 'bad'
suspect_powers = np.vstack([t['Autocorr Power Class'] for t in tables]) == 'suspect'
bad_rfi = np.vstack([t['RFI in Autos Class'] for t in tables]) == 'bad'
suspect_rfi = np.vstack([t['RFI in Autos Class'] for t in tables]) == 'suspect'
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [11], line 2 1 # precompute various helpful quantities ----> 2 all_slopes = np.vstack([t['Autocorr Slope'] for t in tables]) 3 median_slope = np.median(all_slopes) 4 bad_slopes = np.vstack([t['Autocorr Slope Class'] for t in tables]) == 'bad' File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# find all zeros
all_zeros_strs = ap_strs[np.mean(np.vstack([t['Dead? Class'] for t in tables]) == 'bad', axis=0) > all_zero_thresh]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [12], line 2 1 # find all zeros ----> 2 all_zeros_strs = ap_strs[np.mean(np.vstack([t['Dead? Class'] for t in tables]) == 'bad', axis=0) > all_zero_thresh] File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# find even/odd zeros
eo_zeros_strs = ap_strs[np.mean(np.vstack([t['Even/Odd Zeros Class'] for t in tables]) == 'bad', axis=0) > eo_zeros_thresh]
eo_zeros_strs = [ap for ap in eo_zeros_strs if ap not in all_zeros_strs]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [13], line 2 1 # find even/odd zeros ----> 2 eo_zeros_strs = ap_strs[np.mean(np.vstack([t['Even/Odd Zeros Class'] for t in tables]) == 'bad', axis=0) > eo_zeros_thresh] 3 eo_zeros_strs = [ap for ap in eo_zeros_strs if ap not in all_zeros_strs] File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# find cross-polarized antennas
cross_pol_strs = ap_strs[np.mean(np.vstack([t['Cross-Polarized Class'] for t in tables]) == 'bad', axis=0) > cross_pol_thresh]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [14], line 2 1 # find cross-polarized antennas ----> 2 cross_pol_strs = ap_strs[np.mean(np.vstack([t['Cross-Polarized Class'] for t in tables]) == 'bad', axis=0) > cross_pol_thresh] File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# find FEM power issues: must be low power, high slope, and bad or suspect in power, slope, rfi, and shape
fem_off_prod = (bad_powers + .5 * suspect_powers) * (bad_slopes + .5 * suspect_slopes)
fem_off_prod *= (bad_rfi + .5 * suspect_rfi) * (bad_shapes + .5 * suspect_shapes)
fem_off_strs = ap_strs[np.mean(fem_off_prod * (all_powers < median_power) * (all_slopes > median_slope), axis=0) > .1]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [15], line 2 1 # find FEM power issues: must be low power, high slope, and bad or suspect in power, slope, rfi, and shape ----> 2 fem_off_prod = (bad_powers + .5 * suspect_powers) * (bad_slopes + .5 * suspect_slopes) 3 fem_off_prod *= (bad_rfi + .5 * suspect_rfi) * (bad_shapes + .5 * suspect_shapes) 4 fem_off_strs = ap_strs[np.mean(fem_off_prod * (all_powers < median_power) * (all_slopes > median_slope), axis=0) > .1] NameError: name 'bad_powers' is not defined
# find high power issues
high_power_strs = ap_strs[np.mean(bad_powers & (all_powers > median_power), axis=0) > high_power_thresh]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [16], line 2 1 # find high power issues ----> 2 high_power_strs = ap_strs[np.mean(bad_powers & (all_powers > median_power), axis=0) > high_power_thresh] NameError: name 'bad_powers' is not defined
# find other low power issues
low_power_strs = ap_strs[np.mean(bad_powers & (all_powers < median_power), axis=0) > low_power_thresh]
low_power_strs = [ap for ap in low_power_strs if ap not in all_zeros_strs and ap not in fem_off_strs]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [17], line 2 1 # find other low power issues ----> 2 low_power_strs = ap_strs[np.mean(bad_powers & (all_powers < median_power), axis=0) > low_power_thresh] 3 low_power_strs = [ap for ap in low_power_strs if ap not in all_zeros_strs and ap not in fem_off_strs] NameError: name 'bad_powers' is not defined
# find low correlation (but not low power)
low_corr_strs = ap_strs[np.mean(np.vstack([t['Low Correlation Class'] for t in tables]) == 'bad', axis=0) > low_corr_thresh]
low_corr_strs = [ap for ap in low_corr_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs))]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [18], line 2 1 # find low correlation (but not low power) ----> 2 low_corr_strs = ap_strs[np.mean(np.vstack([t['Low Correlation Class'] for t in tables]) == 'bad', axis=0) > low_corr_thresh] 3 low_corr_strs = [ap for ap in low_corr_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs))] File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# find bad bandpasses
bad_bandpass_strs = ap_strs[np.mean(bad_shapes, axis=0) > bad_shape_thresh]
bad_bandpass_strs = [ap for ap in bad_bandpass_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs))]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [19], line 2 1 # find bad bandpasses ----> 2 bad_bandpass_strs = ap_strs[np.mean(bad_shapes, axis=0) > bad_shape_thresh] 3 bad_bandpass_strs = [ap for ap in bad_bandpass_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs))] NameError: name 'bad_shapes' is not defined
# find antennas with excess RFI
excess_rfi_strs = ap_strs[np.mean(np.vstack([t['RFI in Autos Class'] for t in tables]) == 'bad', axis=0) > excess_rfi_thresh]
excess_rfi_strs = [ap for ap in excess_rfi_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs) |
set(bad_bandpass_strs) | set(high_power_strs))]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [20], line 2 1 # find antennas with excess RFI ----> 2 excess_rfi_strs = ap_strs[np.mean(np.vstack([t['RFI in Autos Class'] for t in tables]) == 'bad', axis=0) > excess_rfi_thresh] 3 excess_rfi_strs = [ap for ap in excess_rfi_strs if ap not in (set(low_power_strs) | set(all_zeros_strs) | set(fem_off_strs) | 4 set(bad_bandpass_strs) | set(high_power_strs))] File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# find antennas with high redcal chi^2
chisq_strs = ap_strs[np.mean(np.vstack([t['Redcal chi^2 Class'] for t in tables]) == 'bad', axis=0) > chisq_thresh]
chisq_strs = [ap for ap in chisq_strs if ap not in (set(bad_bandpass_strs) | set(low_power_strs) | set(excess_rfi_strs) | set(low_corr_strs) |
set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs) | set(eo_zeros_strs))]
if OC_SKIP_OUTRIGGERS:
chisq_strs = [ap for ap in chisq_strs if int(ap[:-1]) < 320]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [21], line 2 1 # find antennas with high redcal chi^2 ----> 2 chisq_strs = ap_strs[np.mean(np.vstack([t['Redcal chi^2 Class'] for t in tables]) == 'bad', axis=0) > chisq_thresh] 3 chisq_strs = [ap for ap in chisq_strs if ap not in (set(bad_bandpass_strs) | set(low_power_strs) | set(excess_rfi_strs) | set(low_corr_strs) | 4 set(all_zeros_strs) | set(high_power_strs) | set(fem_off_strs) | set(eo_zeros_strs))] 5 if OC_SKIP_OUTRIGGERS: File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# collect all results
to_print = [(all_zeros_strs, 'All-Zeros', 'These antennas have visibilities that are more than half zeros.'),
(eo_zeros_strs, 'Excess Zeros in Either Even or Odd Spectra',
'These antennas are showing evidence of packet loss or X-engine failure.', True),
(cross_pol_strs, 'Cross-Polarized', 'These antennas have their east and north cables swapped.'),
(fem_off_strs, 'Likely FEM Power Issue', 'These antennas have low power, anomolously high slopes, and extra channels identified as RFI.', True),
(high_power_strs, 'High Power', 'These antennas have high median power.', True),
(low_power_strs, 'Other Low Power Issues', 'These antennas have low power, but are not all-zeros and not FEM off.', True),
(low_corr_strs, 'Low Correlation, But Not Low Power', 'These antennas are low correlation, but their autocorrelation power levels look OK.'),
(bad_bandpass_strs, 'Bad Bandpass Shapes, But Not Bad Power',
'These antennas have unusual bandpass shapes, but are not all-zeros, high power, low power, or FEM off.', True),
(excess_rfi_strs, 'Excess RFI', 'These antennas have excess strucutre (identified as possible RFI) in their bandpassed relative to the ' + \
'median antenna, but not low or high power or a bad bandpass.', True),
(chisq_strs, 'Redcal chi^2', 'These antennas have been idenfied as not redundantly calibrating well, even after passing the above checks.')]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [22], line 2 1 # collect all results ----> 2 to_print = [(all_zeros_strs, 'All-Zeros', 'These antennas have visibilities that are more than half zeros.'), 3 (eo_zeros_strs, 'Excess Zeros in Either Even or Odd Spectra', 4 'These antennas are showing evidence of packet loss or X-engine failure.', True), 5 (cross_pol_strs, 'Cross-Polarized', 'These antennas have their east and north cables swapped.'), 6 (fem_off_strs, 'Likely FEM Power Issue', 'These antennas have low power, anomolously high slopes, and extra channels identified as RFI.', True), 7 (high_power_strs, 'High Power', 'These antennas have high median power.', True), 8 (low_power_strs, 'Other Low Power Issues', 'These antennas have low power, but are not all-zeros and not FEM off.', True), 9 (low_corr_strs, 'Low Correlation, But Not Low Power', 'These antennas are low correlation, but their autocorrelation power levels look OK.'), 10 (bad_bandpass_strs, 'Bad Bandpass Shapes, But Not Bad Power', 11 'These antennas have unusual bandpass shapes, but are not all-zeros, high power, low power, or FEM off.', True), 12 (excess_rfi_strs, 'Excess RFI', 'These antennas have excess strucutre (identified as possible RFI) in their bandpassed relative to the ' + \ 13 'median antenna, but not low or high power or a bad bandpass.', True), 14 (chisq_strs, 'Redcal chi^2', 'These antennas have been idenfied as not redundantly calibrating well, even after passing the above checks.')] NameError: name 'all_zeros_strs' is not defined
def print_high_level_summary():
for tp in sorted(to_print, key=lambda x: len(x[0]), reverse=True):
print(f'{len(tp[0])} antpols (on {len(set([ap[:-1] for ap in tp[0]]))} antennas) frequently flagged for {tp[1]}.')
def print_all_issue_summaries():
for tp in to_print:
print_issue_summary(*tp)
print_high_level_summary()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [24], line 1 ----> 1 print_high_level_summary() Cell In [23], line 2, in print_high_level_summary() 1 def print_high_level_summary(): ----> 2 for tp in sorted(to_print, key=lambda x: len(x[0]), reverse=True): 3 print(f'{len(tp[0])} antpols (on {len(set([ap[:-1] for ap in tp[0]]))} antennas) frequently flagged for {tp[1]}.') NameError: name 'to_print' is not defined
print_all_issue_summaries()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [25], line 1 ----> 1 print_all_issue_summaries() Cell In [23], line 6, in print_all_issue_summaries() 5 def print_all_issue_summaries(): ----> 6 for tp in to_print: 7 print_issue_summary(*tp) NameError: name 'to_print' is not defined
def classification_plot(col):
class_array = classification_array(col)
plt.figure(figsize=(12, len(ants) / 10), dpi=100)
plt.imshow(class_array.T, aspect='auto', interpolation='none', cmap='RdYlGn', vmin=0, vmax=2,
extent=[jds[0] - np.floor(jds[0]), jds[-1] - np.floor(jds[0]), len(ants), 0])
plt.xlabel(f'JD - {int(jds[0])}')
plt.yticks(ticks=np.arange(.5, len(ants)+.5), labels=[ant for ant in ants], fontsize=6)
plt.ylabel('Antenna Number (East First, Then North)')
plt.gca().tick_params(right=True, top=True, labelright=True, labeltop=True)
plt.tight_layout()
plt.title(f'{col}: Green is "good", Yellow is "suspect", Red is "bad"')
This "big green board" shows the overall (i.e. after redundant calibration) classification of antennas on a per-file basis. This is useful for looking at time-dependent effects across the array. While only antenna numbers are labeled, both polarizations are shown, first East then North going down, above and below the antenna's tick mark.
classification_plot('Antenna Class')
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [27], line 1 ----> 1 classification_plot('Antenna Class') Cell In [26], line 2, in classification_plot(col) 1 def classification_plot(col): ----> 2 class_array = classification_array(col) 3 plt.figure(figsize=(12, len(ants) / 10), dpi=100) 4 plt.imshow(class_array.T, aspect='auto', interpolation='none', cmap='RdYlGn', vmin=0, vmax=2, 5 extent=[jds[0] - np.floor(jds[0]), jds[-1] - np.floor(jds[0]), len(ants), 0]) Cell In [8], line 2, in classification_array(col) 1 def classification_array(col): ----> 2 class_array = np.vstack([t[col] for t in tables]) 3 class_array[class_array == 'good'] = 1.7 4 class_array[class_array == 'suspect'] = 1 File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
# compute flag fractions for all classifiers and antennas
frac_flagged = []
for col in class_cols[1:]:
class_array = np.vstack([t[col] for t in tables])
class_array[class_array == 'good'] = False
class_array[class_array == 'suspect'] = False
class_array[class_array == 'bad'] = True
frac_flagged.append(np.sum(class_array, axis=0))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [28], line 4 2 frac_flagged = [] 3 for col in class_cols[1:]: ----> 4 class_array = np.vstack([t[col] for t in tables]) 5 class_array[class_array == 'good'] = False 6 class_array[class_array == 'suspect'] = False File <__array_function__ internals>:200, in vstack(*args, **kwargs) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/numpy/core/shape_base.py:296, in vstack(tup, dtype, casting) 294 if not isinstance(arrs, list): 295 arrs = [arrs] --> 296 return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File <__array_function__ internals>:200, in concatenate(*args, **kwargs) ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 396 and the array at index 53 has size 402
def plot_flag_frac_all_classifiers():
ticks = []
for i, col in enumerate(list(class_cols[1:])):
ticks.append(f'{col} ({np.nanmean(np.array(frac_flagged).astype(float)[i]) / len(csv_files):.2%})')
plt.figure(figsize=(8, len(ants) / 10), dpi=100)
plt.imshow(np.array(frac_flagged).astype(float).T, aspect='auto', interpolation='none', cmap='viridis')
plt.xticks(ticks=np.arange(len(list(class_cols[1:]))), labels=ticks, rotation=-45, ha='left')
plt.yticks(ticks=np.arange(.5, len(ap_strs)+.5, 2), labels=[ant for ant in ants], fontsize=6)
plt.ylabel('Antenna Number (East First, Then North)')
plt.gca().tick_params(right=True, labelright=True,)
ax2 = plt.gca().twiny()
ax2.set_xticks(ticks=np.arange(len(list(class_cols[1:]))), labels=ticks, rotation=45, ha='left')
plt.colorbar(ax=plt.gca(), label=f'Number of Files Flagged Out of {len(csv_files)}', aspect=50)
plt.tight_layout()
This plot shows the fraction of files flagged for each reason for each antenna. It's useful for seeing which problems are transitory and which ones are more common. Note that not all flags are independent and in particular redcal chi^2 takes an OR of other classifications as an input. Also note that only antenna numbers are labeled, both polarizations are shown, first East then North going down, above and below the antenna's tick mark.
plot_flag_frac_all_classifiers()
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) Cell In [30], line 1 ----> 1 plot_flag_frac_all_classifiers() Cell In [29], line 4, in plot_flag_frac_all_classifiers() 2 ticks = [] 3 for i, col in enumerate(list(class_cols[1:])): ----> 4 ticks.append(f'{col} ({np.nanmean(np.array(frac_flagged).astype(float)[i]) / len(csv_files):.2%})') 5 plt.figure(figsize=(8, len(ants) / 10), dpi=100) 6 plt.imshow(np.array(frac_flagged).astype(float).T, aspect='auto', interpolation='none', cmap='viridis') IndexError: index 0 is out of bounds for axis 0 with size 0
def array_class_plot():
fig, axes = plt.subplots(1, 2, figsize=(14, 6), dpi=100, gridspec_kw={'width_ratios': [2, 1]})
plot_antclass(hd.antpos, overall_class, ax=axes[0], ants=[ant for ant in hd.data_ants if ant < 320], legend=False,
title=f'HERA Core: Overall Flagging Based on {overall_thresh:.1%} Daily Threshold')
plot_antclass(hd.antpos, overall_class, ax=axes[1], ants=[ant for ant in hd.data_ants if ant >= 320], radius=50, title='Outriggers')
Overall classification of antenna-polarizations shown on the array layout. If any antenna is marked bad for any reason more than the threshold (default 10%), it is marked bad here. Likewise, if any antenna is marked suspect for more than 10% of the night (but not bad), it's suspect here.
if SUM_FILE is not None: array_class_plot()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [32], line 1 ----> 1 if SUM_FILE is not None: array_class_plot() Cell In [31], line 3, in array_class_plot() 1 def array_class_plot(): 2 fig, axes = plt.subplots(1, 2, figsize=(14, 6), dpi=100, gridspec_kw={'width_ratios': [2, 1]}) ----> 3 plot_antclass(hd.antpos, overall_class, ax=axes[0], ants=[ant for ant in hd.data_ants if ant < 320], legend=False, 4 title=f'HERA Core: Overall Flagging Based on {overall_thresh:.1%} Daily Threshold') 5 plot_antclass(hd.antpos, overall_class, ax=axes[1], ants=[ant for ant in hd.data_ants if ant >= 320], radius=50, title='Outriggers') NameError: name 'overall_class' is not defined