auto_metrics
Nightly Notebook¶By Josh Dillon
Last Updated January 27, 2021
auto_metrics
is a module in hera_qm
that computes a series of statistics on night-long autocorrelation waterfalls to find antenna outliers in shape, power, or temporal structure. In general, these are assessed by collapsing each waterfall to a single (normalized) spectrum, then comparing that spectrum to the mean/median of the unflagged antennas' spectra to compute some difference metric. Those are then converted into modified Z-scores by comparing the overall distribution of good antennas and the worst antenna is flagged if it exceeds some threshold. This whole processes is repeated iteratively until no new bad antennas are identified. This proceeds in two rounds, first with more robust median-based statistics to identify the worst outliers, and then (after an RFI flagging step), a round with mean-based statistics. This notebook examines those mean-based spectra and statistics.
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)
import matplotlib.pyplot as plt
import matplotlib
import glob
import os
import operator
from hera_cal.io import HERAData
from hera_cal.utils import split_pol
from hera_qm.metrics_io import load_metric_file
from hera_notebook_templates.utils import status_colors
from IPython.display import display, HTML
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
display(HTML("<style>.container { width:100% !important; }</style>"))
# If you want to run this notebook locally, copy the output of the next cell into the first few lines of this cell.
# JD = '2459122'
# data_path = '/lustre/aoc/projects/hera/H4C/2459122'
# os.environ["JULIANDATE"] = JD
# os.environ["DATA_PATH"] = data_path
# Use environment variables to figure out path to data
JD = os.environ['JULIANDATE']
data_path = os.environ['DATA_PATH']
print(f'JD = "{JD}"')
print(f'data_path = "{data_path}"')
JD = "2460100" data_path = "/mnt/sn1/2460100"
from astropy.time import Time
utc = Time(JD, format='jd').datetime
print(f'Date: {utc.month}-{utc.day}-{utc.year}')
Date: 6-4-2023
print(f'Looking for data in {data_path} on JD {JD}...')
auto_metrics_file = sorted(glob.glob(os.path.join(data_path, f'zen.{JD}*.auto_metrics.h5')))
if len(auto_metrics_file) > 0:
auto_metrics_file = auto_metrics_file[0]
print(f'Found auto_metrics results file at {auto_metrics_file}.')
else:
raise OSError(f'{auto_metrics_file} not found.')
raw_auto_files = sorted(glob.glob(os.path.join(data_path, f'zen.{JD}.?????.sum.autos.uvh5')))
if len(raw_auto_files) > 0:
print(f'Found {len(raw_auto_files)} extracted autocorrelation files.')
else:
raise OSError(f'No files of the form zen.{JD}.?????.sum.autos.uvh5 found in {data_path}.')
Looking for data in /mnt/sn1/2460100 on JD 2460100... Found auto_metrics results file at /mnt/sn1/2460100/zen.2460100.21285.sum.auto_metrics.h5. Found 1655 extracted autocorrelation files.
# load auto_metrics and define use someful quantities
am = load_metric_file(auto_metrics_file)
mean_round_modz_cut = am['parameters']['mean_round_modz_cut']
ex_ants = am['ex_ants']['r2_ex_ants']
ants = sorted(set(bl[0] for bl in am['modzs']['r2_shape_modzs']))
# load raw autocorrelation waterfalls and define some useful quantities
hd = HERAData(raw_auto_files)
autos, _, _ = hd.read(axis='blt')
wf_shape = next(iter(autos.values())).shape
freqs = autos.freqs / 1e6
times = autos.times
lsts = autos.lsts * 12 / np.pi
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) Cell In [6], line 9 7 # load raw autocorrelation waterfalls and define some useful quantities 8 hd = HERAData(raw_auto_files) ----> 9 autos, _, _ = hd.read(axis='blt') 10 wf_shape = next(iter(autos.values())).shape 11 freqs = autos.freqs / 1e6 File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/hera_cal/io.py:810, in HERAData.read(self, bls, polarizations, times, time_range, lsts, lst_range, frequencies, freq_chans, axis, read_data, return_data, run_check, check_extra, run_check_acceptability, **kwargs) 808 # process data into DataContainers 809 if read_data or self.filetype in ['uvh5', 'uvfits']: --> 810 self._determine_blt_slicing() 811 self._determine_pol_indexing() 812 if read_data and return_data: File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/hera_cal/io.py:595, in HERAData._determine_blt_slicing(self) 593 def _determine_blt_slicing(self): 594 '''Determine the mapping between antenna pairs and slices of the blt axis of the data_array.''' --> 595 self._blt_slices = get_blt_slices(self) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/hera_cal/io.py:441, in get_blt_slices(uvo, tried_to_reorder) 439 if not tried_to_reorder: 440 uvo.reorder_blts(order='time') --> 441 return get_blt_slices(uvo, tried_to_reorder=True) 442 else: 443 raise NotImplementedError('UVData objects with non-regular spacing of ' 444 'baselines in its baseline-times are not supported.') File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/hera_cal/io.py:443, in get_blt_slices(uvo, tried_to_reorder) 441 return get_blt_slices(uvo, tried_to_reorder=True) 442 else: --> 443 raise NotImplementedError('UVData objects with non-regular spacing of ' 444 'baselines in its baseline-times are not supported.') 445 else: 446 blt_slices[(ant1, ant2)] = slice(indices[0], indices[-1] + 1, indices[1] - indices[0]) NotImplementedError: UVData objects with non-regular spacing of baselines in its baseline-times are not supported.
# try to load a priori antenna statusesm but fail gracefully if this doesn't work.
a_priori_statuses = {ant: 'Not Found' for ant in ants}
nodes = {ant: np.nan for ant in ants}
try:
from hera_mc import cm_hookup
# get node numbers
hookup = cm_hookup.get_hookup('default')
for ant_name in hookup:
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in nodes:
nodes[ant] = int(hookup[ant_name].get_part_from_type('node')['E<ground'][1:])
# get apriori antenna status
for ant_name, data in hookup.items():
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in a_priori_statuses:
a_priori_statuses[ant] = data.apriori
except Exception as err:
print(f'Could not load node numbers and a priori antenna statuses.\nEncountered {type(err)} with message: {err}')
# print ex_ants for easy copy-pasting to YAML file
print('ex_ants: [' + ", ".join(str(ant) for ant in ex_ants) + ']')
ex_ants: [4, 5, 7, 15, 17, 18, 19, 27, 28, 29, 32, 34, 37, 38, 40, 47, 51, 52, 53, 54, 55, 58, 60, 61, 63, 66, 68, 77, 78, 81, 82, 83, 86, 87, 90, 92, 93, 94, 96, 102, 104, 107, 109, 110, 111, 112, 113, 115, 117, 118, 121, 124, 126, 127, 131, 133, 136, 137, 140, 142, 143, 147, 148, 149, 150, 151, 155, 156, 158, 159, 160, 161, 165, 166, 167, 168, 169, 170, 173, 180, 181, 182, 184, 187, 189, 190, 191, 192, 193, 200, 202, 204, 205, 207, 208, 209, 210, 211, 224, 225, 226, 227, 242, 243, 246, 262, 329]
def Array_Plot():
plt.figure(figsize=(16,16))
plt.scatter(np.array([autos.antpos[ant][0] for ant in ants]),
np.array([autos.antpos[ant][1] for ant in ants]), c='w', s=0)
for ant in ants:
pos = autos.antpos[ant]
bad = ant in ex_ants
plt.gca().add_artist(plt.Circle(tuple(pos[0:2]), radius=7,
fill=(~bad), color=['grey','r'][bad]))
plt.text(pos[0],pos[1],str(ant), va='center', ha='center', color='w')
plt.xlabel("Antenna East-West Position (meters)")
plt.ylabel("Antenna North-South Position (meters)")
plt.title(f'Antenna Positions and Auto_Metrics Flags on {JD}\n(Maximum Modified Z-Score > {mean_round_modz_cut} in Red)');
plt.axis('equal')
plt.tight_layout()
auto_metrics
flags.¶This plot shows the antenna positions of all antennas in the data. The antennas with at least one Modified Z-score for one metric on one polarization exceeding the cut are entirely flagged.
Array_Plot()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [10], line 1 ----> 1 Array_Plot() Cell In [9], line 3, in Array_Plot() 1 def Array_Plot(): 2 plt.figure(figsize=(16,16)) ----> 3 plt.scatter(np.array([autos.antpos[ant][0] for ant in ants]), 4 np.array([autos.antpos[ant][1] for ant in ants]), c='w', s=0) 5 for ant in ants: 6 pos = autos.antpos[ant] Cell In [9], line 3, in <listcomp>(.0) 1 def Array_Plot(): 2 plt.figure(figsize=(16,16)) ----> 3 plt.scatter(np.array([autos.antpos[ant][0] for ant in ants]), 4 np.array([autos.antpos[ant][1] for ant in ants]), c='w', s=0) 5 for ant in ants: 6 pos = autos.antpos[ant] NameError: name 'autos' is not defined
<Figure size 1600x1600 with 0 Axes>
# Parse modzs for Table 1 and other figures
modzs_to_check = {'Shape': 'r2_shape_modzs', 'Power': 'r2_power_modzs',
'Temporal Variability': 'r2_temp_var_modzs', 'Temporal Discontinuties': 'r2_temp_diff_modzs'}
worst_metrics = []
worst_zs = []
all_modzs = {}
binary_flags = {rationale: [] for rationale in modzs_to_check}
for ant in ants:
# parse modzs and figure out flag counts
modzs = {f'{pol} {rationale}': am['modzs'][dict_name][(ant, ant, pol)]
for rationale, dict_name in modzs_to_check.items() for pol in autos.pols()}
for pol in autos.pols():
for rationale, dict_name in modzs_to_check.items():
binary_flags[rationale].append(am['modzs'][dict_name][(ant, ant, pol)] > mean_round_modz_cut)
# figure out which metric is the largest outlier
worst_metric, worst_z = max(modzs.items(), key=operator.itemgetter(1))
worst_metrics.append(worst_metric)
worst_zs.append(worst_z)
# parse out all metrics for dataframe
for k in modzs:
col_label = k + ' Modified Z-Score'
if col_label in all_modzs:
all_modzs[col_label].append(modzs[k])
else:
all_modzs[col_label] = [modzs[k]]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [11], line 11 7 binary_flags = {rationale: [] for rationale in modzs_to_check} 9 for ant in ants: 10 # parse modzs and figure out flag counts ---> 11 modzs = {f'{pol} {rationale}': am['modzs'][dict_name][(ant, ant, pol)] 12 for rationale, dict_name in modzs_to_check.items() for pol in autos.pols()} 13 for pol in autos.pols(): 14 for rationale, dict_name in modzs_to_check.items(): Cell In [11], line 12, in <dictcomp>(.0) 7 binary_flags = {rationale: [] for rationale in modzs_to_check} 9 for ant in ants: 10 # parse modzs and figure out flag counts 11 modzs = {f'{pol} {rationale}': am['modzs'][dict_name][(ant, ant, pol)] ---> 12 for rationale, dict_name in modzs_to_check.items() for pol in autos.pols()} 13 for pol in autos.pols(): 14 for rationale, dict_name in modzs_to_check.items(): NameError: name 'autos' is not defined
# build dataframe
to_show = {'Ant': ants,
'Node': [f'N{nodes[ant]:02}' for ant in ants],
'A Priori Status': [a_priori_statuses[ant] for ant in ants],
'Worst Metric': worst_metrics, 'Worst Modified Z-Score': worst_zs}
to_show.update(all_modzs)
df = pd.DataFrame(to_show).sort_values('Worst Modified Z-Score', ascending=False)
# style dataframe
z_score_cols = [col for col in df.columns if col not in ['Ant', 'Node', 'A Priori Status', 'Worst Metric']]
table1 = df.style.hide_index()\
.applymap(lambda val: 'font-weight: bold' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: 'color: red' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: f'background-color: {status_colors[val]}' if val in status_colors else '', subset=['A Priori Status']) \
.background_gradient(cmap='viridis', vmax=mean_round_modz_cut * 3, vmin=0, axis=None, subset=z_score_cols) \
.applymap(lambda val: 'font-weight: bold' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols) \
.applymap(lambda val: 'color: red' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols) \
.set_table_styles([dict(selector="th",props=[('max-width', '70pt')])])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [12], line 7 2 to_show = {'Ant': ants, 3 'Node': [f'N{nodes[ant]:02}' for ant in ants], 4 'A Priori Status': [a_priori_statuses[ant] for ant in ants], 5 'Worst Metric': worst_metrics, 'Worst Modified Z-Score': worst_zs} 6 to_show.update(all_modzs) ----> 7 df = pd.DataFrame(to_show).sort_values('Worst Modified Z-Score', ascending=False) 9 # style dataframe 10 z_score_cols = [col for col in df.columns if col not in ['Ant', 'Node', 'A Priori Status', 'Worst Metric']] File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/frame.py:664, in DataFrame.__init__(self, data, index, columns, dtype, copy) 658 mgr = self._init_mgr( 659 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy 660 ) 662 elif isinstance(data, dict): 663 # GH#38939 de facto copy defaults to False only in non-dict cases --> 664 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) 665 elif isinstance(data, ma.MaskedArray): 666 import numpy.ma.mrecords as mrecords File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/internals/construction.py:493, in dict_to_mgr(data, index, columns, dtype, typ, copy) 489 else: 490 # dtype check to exclude e.g. range objects, scalars 491 arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays] --> 493 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/internals/construction.py:118, in arrays_to_mgr(arrays, columns, index, dtype, verify_integrity, typ, consolidate) 115 if verify_integrity: 116 # figure out the index, if necessary 117 if index is None: --> 118 index = _extract_index(arrays) 119 else: 120 index = ensure_index(index) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/internals/construction.py:666, in _extract_index(data) 664 lengths = list(set(raw_lengths)) 665 if len(lengths) > 1: --> 666 raise ValueError("All arrays must be of the same length") 668 if have_dicts: 669 raise ValueError( 670 "Mixing dicts with non-Series may lead to ambiguous ordering." 671 ) ValueError: All arrays must be of the same length
This table displays the metrics for each antenna, highlighting which one is the worst. It is sorted by each antenna's worst metric. When one metric exceeds the threshold, auto_metrics
recommends cutting that antenna. Flagged antennas and metrics exceeding the cut are shown in bold and red. Also shown is the antenna's a priori status.
HTML(table1.render())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [13], line 1 ----> 1 HTML(table1.render()) NameError: name 'table1' is not defined
def Flag_Bar_Chart():
plt.figure(figsize=(8, 4), dpi=100)
# count
rationales = list(binary_flags.keys())
flags_list = np.array([binary_flags[rationale] for rationale in rationales])
antpol_flags = [np.sum(f) for f in flags_list]
ant_flags = [np.sum(np.array(f)[0::2] | np.array(f)[1::2]) for f in flags_list]
# make bar chart
plt.bar(np.arange(len(rationales)), antpol_flags, width=.7, color='yellow', ec='k', tick_label=rationales)
for x, (nflags, nants) in enumerate(zip(antpol_flags, ant_flags)):
plt.text(x, nflags/2, f'{nflags} Feeds on \n{nants} Antennas\nFlagged', va='center', ha='center')
# set labels
plt.ylabel('Antenna-Polarizations Flagged')
plt.xlabel('Reason for Flagging')
plt.tight_layout()
This bar chart summarizes the number of antenna-polarizations that are statistical outliers in each metric (though often they overlap). Some of these issues occur on both polarizations, so there are fewer unique antennas flagged for each rationale than there are ant-pols flagged, as noted by the labels.
Flag_Bar_Chart()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In [15], line 1 ----> 1 Flag_Bar_Chart() Cell In [14], line 8, in Flag_Bar_Chart() 6 flags_list = np.array([binary_flags[rationale] for rationale in rationales]) 7 antpol_flags = [np.sum(f) for f in flags_list] ----> 8 ant_flags = [np.sum(np.array(f)[0::2] | np.array(f)[1::2]) for f in flags_list] 10 # make bar chart 11 plt.bar(np.arange(len(rationales)), antpol_flags, width=.7, color='yellow', ec='k', tick_label=rationales) Cell In [14], line 8, in <listcomp>(.0) 6 flags_list = np.array([binary_flags[rationale] for rationale in rationales]) 7 antpol_flags = [np.sum(f) for f in flags_list] ----> 8 ant_flags = [np.sum(np.array(f)[0::2] | np.array(f)[1::2]) for f in flags_list] 10 # make bar chart 11 plt.bar(np.arange(len(rationales)), antpol_flags, width=.7, color='yellow', ec='k', tick_label=rationales) TypeError: ufunc 'bitwise_or' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
<Figure size 800x400 with 0 Axes>
def Rationale_Corr_Plot():
plt.figure(figsize=(6,6), dpi=100)
# compute correlation matrix
rationales = list(binary_flags.keys())
flags_list = np.array([binary_flags[rationale] for rationale in rationales])
corrs = np.corrcoef(flags_list)
# plot and label correlation matrix
plt.imshow(corrs, cmap='viridis', interpolation='nearest', origin='upper')
for i in range(corrs.shape[0]):
for j in range(corrs.shape[1]):
plt.text(i, j, np.round(corrs[i, j], 3), va='center', ha='center',
bbox={'facecolor': 'w', 'ec': 'w', 'alpha': .75})
# colorbar, labels, and style
plt.yticks(range(len(rationales)), rationales)
plt.xticks(range(len(rationales)), rationales, rotation=-45, ha='right')
plt.gca().xaxis.tick_top()
plt.clim([0, 1])
plt.colorbar(fraction=0.046, pad=0.04)
plt.tight_layout()
This plot shows the probability that if a given ant-pol is flagged for some reason, it's also flagged for another reason.
Rationale_Corr_Plot()
def plot_all_spectra(spectra, mod_zs, modz_cut, overall, freqs, reason, ex_ants=[],
xlabel='Frequency (MHz)', ylabel='', yscale='linear', ylim_factor=None):
'''Helper function for plotting all spectra and showing which ones were flagged and why.'''
fig, axes = plt.subplots(1,2, figsize=(14,5), dpi=100)
pols = sorted(set([bl[2] for bl in spectra]))
for ax, pol in zip(axes, pols):
# sort antennas into good, bad, and bad but not for this reason
bad_here = [bl for bl in spectra if (bl[2] == pol) and mod_zs[bl] > modz_cut]
other_bad = [bl for bl in spectra if (bl[2] == pol) and (bl[0] in ex_ants) and (bl not in bad_here)]
good = [bl for bl in spectra if (bl[2] == pol) and (bl[0] not in ex_ants)]
# plot all spectra
l1, l2, l3 = None, None, None
for bl in other_bad:
l2, = ax.plot(freqs, spectra[bl], 'darkviolet', lw=.5)
for bl in bad_here:
l1, = ax.plot(freqs, spectra[bl], 'r', lw=.5)
for bl in good:
l3, = ax.plot(freqs, spectra[bl], 'grey', alpha=.5)
l4, = ax.plot(freqs, overall[bl[2]], 'k--')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_yscale(yscale)
if ylim_factor is not None:
ax.set_ylim([np.nanmin([spectra[bl] for bl in good]) / ylim_factor,
np.nanmax([spectra[bl] for bl in good]) * ylim_factor])
ax.set_title(f'Outliers in Autocorrelation {reason}: {pol}')
ax.legend([l1, l2, l3, l4], [f'Flagged for {reason}', 'Flagged for Another Reason', 'Unflagged', 'Average Unflagged'], loc='lower right')
plt.tight_layout()
overall_shape = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra_normed'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
overall_power = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
overall_temp_var = {pol: np.nanmean([spec for bl, spec in am['spectra']['std_spectra_normed'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
overall_temp_diff = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_abs_diff_spectra_normed'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [19], line 2 1 overall_shape = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra_normed'].items() if (bl[2] == pol) ----> 2 and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()} 3 overall_power = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra'].items() if (bl[2] == pol) 4 and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()} 5 overall_temp_var = {pol: np.nanmean([spec for bl, spec in am['spectra']['std_spectra_normed'].items() if (bl[2] == pol) 6 and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()} NameError: name 'autos' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation shape (see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['mean_spectra_normed'], am['modzs']['r2_shape_modzs'], mean_round_modz_cut,
overall_shape, freqs, 'Shape', ex_ants=ex_ants, yscale='linear', ylim_factor=1.2)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [20], line 2 1 plot_all_spectra(am['spectra']['mean_spectra_normed'], am['modzs']['r2_shape_modzs'], mean_round_modz_cut, ----> 2 overall_shape, freqs, 'Shape', ex_ants=ex_ants, yscale='linear', ylim_factor=1.2) NameError: name 'overall_shape' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation amplitude (see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['mean_spectra'], am['modzs']['r2_power_modzs'], mean_round_modz_cut,
overall_power, freqs, 'Power', ex_ants=ex_ants, yscale='log')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [21], line 2 1 plot_all_spectra(am['spectra']['mean_spectra'], am['modzs']['r2_power_modzs'], mean_round_modz_cut, ----> 2 overall_power, freqs, 'Power', ex_ants=ex_ants, yscale='log') NameError: name 'overall_power' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation temporal variability (as measured by a standard deviation over time; see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['std_spectra_normed'], am['modzs']['r2_temp_var_modzs'], mean_round_modz_cut,
overall_temp_var, freqs, 'Temporal Variability', ex_ants=ex_ants, yscale='log')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [22], line 2 1 plot_all_spectra(am['spectra']['std_spectra_normed'], am['modzs']['r2_temp_var_modzs'], mean_round_modz_cut, ----> 2 overall_temp_var, freqs, 'Temporal Variability', ex_ants=ex_ants, yscale='log') NameError: name 'overall_temp_var' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation temporal discontinuities (as measured by the average absolute integration-to-integration difference over time; see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['mean_abs_diff_spectra_normed'], am['modzs']['r2_temp_diff_modzs'], mean_round_modz_cut,
overall_temp_diff, freqs, 'Temporal Discontinutities', ex_ants=ex_ants, yscale='log')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [23], line 2 1 plot_all_spectra(am['spectra']['mean_abs_diff_spectra_normed'], am['modzs']['r2_temp_diff_modzs'], mean_round_modz_cut, ----> 2 overall_temp_diff, freqs, 'Temporal Discontinutities', ex_ants=ex_ants, yscale='log') NameError: name 'overall_temp_diff' is not defined
# compute average good autocorrelations for each polarization
avg_good_autos = {pol: np.zeros(wf_shape, dtype=float) for pol in autos.pols()}
for pol in autos.pols():
for i in range(wf_shape[0]):
avg_good_autos[pol][i] = np.mean([np.abs(autos[bl][i, :]) for bl in autos
if (bl[0] not in ex_ants) and (bl[2] == pol)], axis=0)
avg_good_autos[pol][am['flags']] = np.nan
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [24], line 2 1 # compute average good autocorrelations for each polarization ----> 2 avg_good_autos = {pol: np.zeros(wf_shape, dtype=float) for pol in autos.pols()} 3 for pol in autos.pols(): 4 for i in range(wf_shape[0]): NameError: name 'autos' is not defined
def Avg_Auto_Plot():
fig, axes = plt.subplots(1, 2, figsize=(14,5), dpi=100)
for ax, pol in zip(axes, sorted(autos.pols())):
im = ax.imshow(avg_good_autos[pol], aspect='auto', interpolation='nearest',
extent=[freqs[0], freqs[-1], times[-1], times[0]])
ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2))
plt.colorbar(im, ax=ax)
ax.set_title(f'Average Good Raw {pol} Autocorrelation After Flagging')
ax.set_ylabel('LST (hours)')
ax.set_xlabel('Frequency (MHz)')
plt.tight_layout()
Here we show the waterfalls of the array-averaged autocorrelations over the night, after removing all flagged antennas. We also show the RFI mask generated between the median and mean rounds of antenna outlier detection. This is meant to show that there is little or no RFI remaining to affect the statistics.
Avg_Auto_Plot()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [26], line 1 ----> 1 Avg_Auto_Plot() Cell In [25], line 3, in Avg_Auto_Plot() 1 def Avg_Auto_Plot(): 2 fig, axes = plt.subplots(1, 2, figsize=(14,5), dpi=100) ----> 3 for ax, pol in zip(axes, sorted(autos.pols())): 4 im = ax.imshow(avg_good_autos[pol], aspect='auto', interpolation='nearest', 5 extent=[freqs[0], freqs[-1], times[-1], times[0]]) 6 ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2)) NameError: name 'autos' is not defined
def plot_spectra(axes, ant, spec, modzs, modz_cut, overall, reason, yscale='linear'):
'''Helper function for plotting both antennas of a given polarization.'''
for pol, ax in zip(sorted(autos.pols()), axes):
bl = (ant, ant, pol)
# plot good antennas
for bl2 in modzs:
if (bl2[0] not in ex_ants) and (bl2[2] == pol):
ax.plot(freqs, spec[bl2], 'grey', lw=.5, alpha=.5)
ax.plot(freqs, overall[pol], 'k--')
# plot this anetnna
color = 'r'
alpha = .75
if modzs[bl] >= modz_cut:
alpha = 1
elif bl[0] in ex_ants:
color = 'darkviolet'
else:
color = 'darkgreen'
ax.plot(freqs, spec[bl], color, alpha=alpha, label=f'{ant} {pol}\n(z = {np.round(modzs[bl],1)})')
# decorate axis
ax.set_yscale(yscale)
ax.legend(loc=2)
ax.set_title(f'{pol} {reason}')
len(ants)
198
def Plot_All_Auto_Metrics():
for row, ant in enumerate(df['Ant']):
# print title of section
display(HTML(f'<h2>Antenna {ant}: {JD}</h2>'))
# print metrics
df_row = df.loc[df['Ant'] == ant]
df_row = df_row.style.hide_index()\
.set_table_styles([dict(selector="th",props=[('max-width', '70pt')])])\
.applymap(lambda val: f'background-color: {status_colors[val]}' if val in status_colors else '', subset=['A Priori Status']) \
.applymap(lambda val: 'font-weight: bold' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: 'color: red' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: 'font-weight: bold' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols) \
.applymap(lambda val: 'color: red' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols)
display(HTML(df_row.render()))
# plot spectra and waterfalls
fig, axes = plt.subplots(2, 6, figsize=(18, 5.5), dpi=(40 * (104 / len(ants))**.5)) # this should help manage filesize
# plot individual spectra compared to all good antennas
plot_spectra(axes[:, 0], ant, am['spectra']['mean_spectra_normed'], am['modzs']['r2_shape_modzs'],
mean_round_modz_cut, overall_shape, 'Shape', yscale='linear')
plot_spectra(axes[:, 1], ant, am['spectra']['mean_spectra'], am['modzs']['r2_power_modzs'],
mean_round_modz_cut, overall_power, 'Power', yscale='log')
plot_spectra(axes[:, 2], ant, am['spectra']['std_spectra_normed'], am['modzs']['r2_temp_var_modzs'],
mean_round_modz_cut, overall_temp_var, 'Temporal Variability', yscale='linear')
plot_spectra(axes[:, 3], ant, am['spectra']['mean_abs_diff_spectra_normed'], am['modzs']['r2_temp_diff_modzs'],
mean_round_modz_cut, overall_temp_diff, 'Temporal Discontinutities', yscale='linear')
plt.tight_layout()
# plot linear-scale waterfalls
for pol, ax in zip(sorted(autos.pols()), axes[:, 4]):
bl = (ant, ant, pol)
im = ax.imshow(np.where(am['flags'], np.nan, autos[bl].real),
aspect='auto', interpolation='nearest', cmap='inferno',
extent=[freqs[0], freqs[-1], times[-1], times[0]])
ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2))
ax.set_title(f'{pol} Waterfall (Linear Scale)')
# plot log-scale mean-divided waterfalls
for pol, ax in zip(sorted(autos.pols()), axes[:, 5]):
bl = (ant, ant, pol)
to_plot = autos[bl].real / avg_good_autos[pol]
to_plot[am['flags']] = np.nan
to_plot /= np.nanmean(to_plot)
im = ax.imshow(np.log10(to_plot), aspect='auto', cmap='seismic', interpolation='nearest', vmin=-.07, vmax=.07,
extent=[freqs[0], freqs[-1], times[-1], times[0]])
ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2))
ax.set_title(f'{pol} Log(Normalized Waterfall)')
plt.show()
# print some whitespace
display(HTML('<hr style="height:3px">'))
Here we show the metrics for each antenna and the spectra/waterfalls that hopefully explain what led to them. The table reproduces the information from Table 1 above. The first four panels in each row clearly highlight the antenna's spectrum as it compares to the mean good antenna (black) and the distribution of good antennas (gray). Spectra in red were flagged as outliers. Spectra in purple were flagged for some other reason, either another metric or on the other polarization. Good antennas are shown in green. Completely flagged channels (RFI and band edges) appear as white gaps. In the fifth column, the waterfall of that autocorrelation is shown on a linear scale after RFI/band edge flags (white). In the sixth column, we show the log (base 10) of the same waterfall, divided by the average good antennas' waterfall of that polarization and then normalized to an average of 1.
Plot_All_Auto_Metrics()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [30], line 1 ----> 1 Plot_All_Auto_Metrics() Cell In [29], line 2, in Plot_All_Auto_Metrics() 1 def Plot_All_Auto_Metrics(): ----> 2 for row, ant in enumerate(df['Ant']): 3 # print title of section 4 display(HTML(f'<h2>Antenna {ant}: {JD}</h2>')) 6 # print metrics NameError: name 'df' is not defined
from hera_qm import __version__
print(__version__)
2.1.1.dev3+gb291d34