auto_metrics
Nightly Notebook¶By Josh Dillon
Last Updated January 27, 2021
auto_metrics
is a module in hera_qm
that computes a series of statistics on night-long autocorrelation waterfalls to find antenna outliers in shape, power, or temporal structure. In general, these are assessed by collapsing each waterfall to a single (normalized) spectrum, then comparing that spectrum to the mean/median of the unflagged antennas' spectra to compute some difference metric. Those are then converted into modified Z-scores by comparing the overall distribution of good antennas and the worst antenna is flagged if it exceeds some threshold. This whole processes is repeated iteratively until no new bad antennas are identified. This proceeds in two rounds, first with more robust median-based statistics to identify the worst outliers, and then (after an RFI flagging step), a round with mean-based statistics. This notebook examines those mean-based spectra and statistics.
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)
import matplotlib.pyplot as plt
import matplotlib
import glob
import os
import operator
from hera_cal.io import HERAData
from hera_cal.utils import split_pol
from hera_qm.metrics_io import load_metric_file
from hera_notebook_templates.utils import status_colors
from IPython.display import display, HTML
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
display(HTML("<style>.container { width:100% !important; }</style>"))
# If you want to run this notebook locally, copy the output of the next cell into the first few lines of this cell.
# JD = '2459122'
# data_path = '/lustre/aoc/projects/hera/H4C/2459122'
# os.environ["JULIANDATE"] = JD
# os.environ["DATA_PATH"] = data_path
# Use environment variables to figure out path to data
JD = os.environ['JULIANDATE']
data_path = os.environ['DATA_PATH']
print(f'JD = "{JD}"')
print(f'data_path = "{data_path}"')
JD = "2459764" data_path = "/mnt/sn1/2459764"
from astropy.time import Time
utc = Time(JD, format='jd').datetime
print(f'Date: {utc.month}-{utc.day}-{utc.year}')
Date: 7-3-2022
print(f'Looking for data in {data_path} on JD {JD}...')
auto_metrics_file = sorted(glob.glob(os.path.join(data_path, f'zen.{JD}*.auto_metrics.h5')))
if len(auto_metrics_file) > 0:
auto_metrics_file = auto_metrics_file[0]
print(f'Found auto_metrics results file at {auto_metrics_file}.')
else:
raise OSError(f'{auto_metrics_file} not found.')
raw_auto_files = sorted(glob.glob(os.path.join(data_path, f'zen.{JD}.?????.sum.autos.uvh5')))
if len(raw_auto_files) > 0:
print(f'Found {len(raw_auto_files)} extracted autocorrelation files.')
else:
raise OSError(f'No files of the form zen.{JD}.?????.sum.autos.uvh5 found in {data_path}.')
Looking for data in /mnt/sn1/2459764 on JD 2459764... Found auto_metrics results file at /mnt/sn1/2459764/zen.2459764.21977.sum.auto_metrics.h5. Found 1937 extracted autocorrelation files.
# load auto_metrics and define use someful quantities
am = load_metric_file(auto_metrics_file)
mean_round_modz_cut = am['parameters']['mean_round_modz_cut']
ex_ants = am['ex_ants']['r2_ex_ants']
ants = sorted(set(bl[0] for bl in am['modzs']['r2_shape_modzs']))
# load raw autocorrelation waterfalls and define some useful quantities
hd = HERAData(raw_auto_files)
autos, _, _ = hd.read(axis='blt')
wf_shape = next(iter(autos.values())).shape
freqs = autos.freqs / 1e6
times = autos.times
lsts = autos.lsts * 12 / np.pi
antenna_positions parameter value is array, values are not close
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [6], in <cell line: 9>() 7 # load raw autocorrelation waterfalls and define some useful quantities 8 hd = HERAData(raw_auto_files) ----> 9 autos, _, _ = hd.read(axis='blt') 10 wf_shape = next(iter(autos.values())).shape 11 freqs = autos.freqs / 1e6 File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/hera_cal/io.py:728, in HERAData.read(self, bls, polarizations, times, time_range, lsts, lst_range, frequencies, freq_chans, axis, read_data, return_data, run_check, check_extra, run_check_acceptability, **kwargs) 726 try: 727 if self.filetype in ['uvh5', 'uvfits']: --> 728 super().read(self.filepaths, file_type=self.filetype, axis=axis, bls=bls, polarizations=polarizations, 729 times=times, time_range=time_range, lsts=lsts, lst_range=lst_range, frequencies=frequencies, 730 freq_chans=freq_chans, read_data=read_data, run_check=run_check, check_extra=check_extra, 731 run_check_acceptability=run_check_acceptability, **kwargs) 732 if self.filetype == 'uvfits': 733 self.unphase_to_drift() File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pyuvdata/uvdata/uvdata.py:12087, in UVData.read(self, filename, axis, file_type, read_data, skip_bad_files, background_lsts, ignore_name, allow_rephase, phase_center_radec, unphase_to_drift, phase_frame, phase_epoch, orig_phase_frame, phase_use_ant_pos, fix_old_proj, fix_use_ant_pos, make_multi_phase, antenna_nums, antenna_names, ant_str, bls, frequencies, freq_chans, times, time_range, lsts, lst_range, polarizations, blt_inds, keep_all_metadata, run_check, check_extra, run_check_acceptability, strict_uvw_antpos_check, check_autos, fix_autos, phase_type, correct_lat_lon, calc_lst, use_model, data_column, pol_order, ignore_single_chan, raise_error, read_weights, allow_flex_pol, multidim_index, data_array_dtype, use_aoflagger_flags, use_cotter_flags, remove_dig_gains, remove_coarse_band, correct_cable_len, correct_van_vleck, cheby_approx, flag_small_auto_ants, flag_small_sig_ants, propagate_coarse_flags, flag_init, edge_width, start_flag, end_flag, flag_dc_offset, remove_flagged_ants, phase_to_pointing_center, nsample_array_dtype, isource, irec, isb, corrchunk, pseudo_cont, rechunk) 12084 # Concatenate once at end 12085 if axis is not None: 12086 # Rewrote fast_concat to operate on lists > 12087 self.fast_concat( 12088 uv_list, 12089 axis, 12090 phase_center_radec=phase_center_radec, 12091 unphase_to_drift=unphase_to_drift, 12092 phase_frame=phase_frame, 12093 orig_phase_frame=orig_phase_frame, 12094 use_ant_pos=phase_use_ant_pos, 12095 run_check=run_check, 12096 check_extra=check_extra, 12097 run_check_acceptability=run_check_acceptability, 12098 inplace=True, 12099 ignore_name=ignore_name, 12100 ) 12101 else: 12102 # Too much work to rewrite __add__ to operate on lists 12103 # of files, so instead doing a binary tree merge 12104 uv_list = [self] + uv_list File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pyuvdata/uvdata/uvdata.py:7557, in UVData.fast_concat(self, other, axis, inplace, phase_center_radec, unphase_to_drift, phase_frame, orig_phase_frame, use_ant_pos, verbose_history, run_check, check_extra, run_check_acceptability, strict_uvw_antpos_check, ignore_name) 7551 if not params_match: 7552 msg = ( 7553 "UVParameter " 7554 + a[1:] 7555 + " does not match. Cannot combine objects." 7556 ) -> 7557 raise ValueError(msg) 7559 if axis == "freq": 7560 this.Nfreqs = sum([this.Nfreqs] + [obj.Nfreqs for obj in other]) ValueError: UVParameter antenna_positions does not match. Cannot combine objects.
# try to load a priori antenna statusesm but fail gracefully if this doesn't work.
a_priori_statuses = {ant: 'Not Found' for ant in ants}
nodes = {ant: np.nan for ant in ants}
try:
from hera_mc import cm_hookup
# get node numbers
hookup = cm_hookup.get_hookup('default')
for ant_name in hookup:
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in nodes:
nodes[ant] = int(hookup[ant_name].get_part_from_type('node')['E<ground'][1:])
# get apriori antenna status
for ant_name, data in hookup.items():
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in a_priori_statuses:
a_priori_statuses[ant] = data.apriori
except Exception as err:
print(f'Could not load node numbers and a priori antenna statuses.\nEncountered {type(err)} with message: {err}')
# print ex_ants for easy copy-pasting to YAML file
print('ex_ants: [' + ", ".join(str(ant) for ant in ex_ants) + ']')
ex_ants: [7, 8, 9, 10, 18, 19, 20, 21, 27, 28, 31, 32, 33, 36, 38, 40, 45, 46, 50, 51, 52, 55, 56, 57, 67, 69, 70, 73, 81, 82, 83, 88, 89, 90, 91, 92, 93, 94, 98, 100, 105, 106, 107, 110, 116, 124, 125, 126, 129, 130, 136, 137, 138, 141, 142, 144, 145, 150, 155, 156, 160, 161, 166, 167, 168, 169, 170, 177, 180, 181, 182, 183, 187, 190, 220, 221, 222, 320, 321, 323, 324, 329, 333]
def Array_Plot():
plt.figure(figsize=(16,16))
plt.scatter(np.array([autos.antpos[ant][0] for ant in ants]),
np.array([autos.antpos[ant][1] for ant in ants]), c='w', s=0)
for ant in ants:
pos = autos.antpos[ant]
bad = ant in ex_ants
plt.gca().add_artist(plt.Circle(tuple(pos[0:2]), radius=7,
fill=(~bad), color=['grey','r'][bad]))
plt.text(pos[0],pos[1],str(ant), va='center', ha='center', color='w')
plt.xlabel("Antenna East-West Position (meters)")
plt.ylabel("Antenna North-South Position (meters)")
plt.title(f'Antenna Positions and Auto_Metrics Flags on {JD}\n(Maximum Modified Z-Score > {mean_round_modz_cut} in Red)');
plt.axis('equal')
plt.tight_layout()
auto_metrics
flags.¶This plot shows the antenna positions of all antennas in the data. The antennas with at least one Modified Z-score for one metric on one polarization exceeding the cut are entirely flagged.
Array_Plot()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [10], in <cell line: 1>() ----> 1 Array_Plot() Input In [9], in Array_Plot() 1 def Array_Plot(): 2 plt.figure(figsize=(16,16)) ----> 3 plt.scatter(np.array([autos.antpos[ant][0] for ant in ants]), 4 np.array([autos.antpos[ant][1] for ant in ants]), c='w', s=0) 5 for ant in ants: 6 pos = autos.antpos[ant] Input In [9], in <listcomp>(.0) 1 def Array_Plot(): 2 plt.figure(figsize=(16,16)) ----> 3 plt.scatter(np.array([autos.antpos[ant][0] for ant in ants]), 4 np.array([autos.antpos[ant][1] for ant in ants]), c='w', s=0) 5 for ant in ants: 6 pos = autos.antpos[ant] NameError: name 'autos' is not defined
<Figure size 1600x1600 with 0 Axes>
# Parse modzs for Table 1 and other figures
modzs_to_check = {'Shape': 'r2_shape_modzs', 'Power': 'r2_power_modzs',
'Temporal Variability': 'r2_temp_var_modzs', 'Temporal Discontinuties': 'r2_temp_diff_modzs'}
worst_metrics = []
worst_zs = []
all_modzs = {}
binary_flags = {rationale: [] for rationale in modzs_to_check}
for ant in ants:
# parse modzs and figure out flag counts
modzs = {f'{pol} {rationale}': am['modzs'][dict_name][(ant, ant, pol)]
for rationale, dict_name in modzs_to_check.items() for pol in autos.pols()}
for pol in autos.pols():
for rationale, dict_name in modzs_to_check.items():
binary_flags[rationale].append(am['modzs'][dict_name][(ant, ant, pol)] > mean_round_modz_cut)
# figure out which metric is the largest outlier
worst_metric, worst_z = max(modzs.items(), key=operator.itemgetter(1))
worst_metrics.append(worst_metric)
worst_zs.append(worst_z)
# parse out all metrics for dataframe
for k in modzs:
col_label = k + ' Modified Z-Score'
if col_label in all_modzs:
all_modzs[col_label].append(modzs[k])
else:
all_modzs[col_label] = [modzs[k]]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [11], in <cell line: 9>() 7 binary_flags = {rationale: [] for rationale in modzs_to_check} 9 for ant in ants: 10 # parse modzs and figure out flag counts ---> 11 modzs = {f'{pol} {rationale}': am['modzs'][dict_name][(ant, ant, pol)] 12 for rationale, dict_name in modzs_to_check.items() for pol in autos.pols()} 13 for pol in autos.pols(): 14 for rationale, dict_name in modzs_to_check.items(): Input In [11], in <dictcomp>(.0) 7 binary_flags = {rationale: [] for rationale in modzs_to_check} 9 for ant in ants: 10 # parse modzs and figure out flag counts 11 modzs = {f'{pol} {rationale}': am['modzs'][dict_name][(ant, ant, pol)] ---> 12 for rationale, dict_name in modzs_to_check.items() for pol in autos.pols()} 13 for pol in autos.pols(): 14 for rationale, dict_name in modzs_to_check.items(): NameError: name 'autos' is not defined
# build dataframe
to_show = {'Ant': ants,
'Node': [f'N{nodes[ant]:02}' for ant in ants],
'A Priori Status': [a_priori_statuses[ant] for ant in ants],
'Worst Metric': worst_metrics, 'Worst Modified Z-Score': worst_zs}
to_show.update(all_modzs)
df = pd.DataFrame(to_show).sort_values('Worst Modified Z-Score', ascending=False)
# style dataframe
z_score_cols = [col for col in df.columns if col not in ['Ant', 'Node', 'A Priori Status', 'Worst Metric']]
table1 = df.style.hide_index()\
.applymap(lambda val: 'font-weight: bold' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: 'color: red' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: f'background-color: {status_colors[val]}' if val in status_colors else '', subset=['A Priori Status']) \
.background_gradient(cmap='viridis', vmax=mean_round_modz_cut * 3, vmin=0, axis=None, subset=z_score_cols) \
.applymap(lambda val: 'font-weight: bold' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols) \
.applymap(lambda val: 'color: red' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols) \
.set_table_styles([dict(selector="th",props=[('max-width', '70pt')])])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [12], in <cell line: 7>() 2 to_show = {'Ant': ants, 3 'Node': [f'N{nodes[ant]:02}' for ant in ants], 4 'A Priori Status': [a_priori_statuses[ant] for ant in ants], 5 'Worst Metric': worst_metrics, 'Worst Modified Z-Score': worst_zs} 6 to_show.update(all_modzs) ----> 7 df = pd.DataFrame(to_show).sort_values('Worst Modified Z-Score', ascending=False) 9 # style dataframe 10 z_score_cols = [col for col in df.columns if col not in ['Ant', 'Node', 'A Priori Status', 'Worst Metric']] File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/frame.py:636, in DataFrame.__init__(self, data, index, columns, dtype, copy) 630 mgr = self._init_mgr( 631 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy 632 ) 634 elif isinstance(data, dict): 635 # GH#38939 de facto copy defaults to False only in non-dict cases --> 636 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) 637 elif isinstance(data, ma.MaskedArray): 638 import numpy.ma.mrecords as mrecords File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/internals/construction.py:502, in dict_to_mgr(data, index, columns, dtype, typ, copy) 494 arrays = [ 495 x 496 if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype) 497 else x.copy() 498 for x in arrays 499 ] 500 # TODO: can we get rid of the dt64tz special case above? --> 502 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/internals/construction.py:120, in arrays_to_mgr(arrays, columns, index, dtype, verify_integrity, typ, consolidate) 117 if verify_integrity: 118 # figure out the index, if necessary 119 if index is None: --> 120 index = _extract_index(arrays) 121 else: 122 index = ensure_index(index) File ~/mambaforge/envs/RTP/lib/python3.10/site-packages/pandas/core/internals/construction.py:674, in _extract_index(data) 672 lengths = list(set(raw_lengths)) 673 if len(lengths) > 1: --> 674 raise ValueError("All arrays must be of the same length") 676 if have_dicts: 677 raise ValueError( 678 "Mixing dicts with non-Series may lead to ambiguous ordering." 679 ) ValueError: All arrays must be of the same length
This table displays the metrics for each antenna, highlighting which one is the worst. It is sorted by each antenna's worst metric. When one metric exceeds the threshold, auto_metrics
recommends cutting that antenna. Flagged antennas and metrics exceeding the cut are shown in bold and red. Also shown is the antenna's a priori status.
HTML(table1.render())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [13], in <cell line: 1>() ----> 1 HTML(table1.render()) NameError: name 'table1' is not defined
def Flag_Bar_Chart():
plt.figure(figsize=(8, 4), dpi=100)
# count
rationales = list(binary_flags.keys())
flags_list = np.array([binary_flags[rationale] for rationale in rationales])
antpol_flags = [np.sum(f) for f in flags_list]
ant_flags = [np.sum(np.array(f)[0::2] | np.array(f)[1::2]) for f in flags_list]
# make bar chart
plt.bar(np.arange(len(rationales)), antpol_flags, width=.7, color='yellow', ec='k', tick_label=rationales)
for x, (nflags, nants) in enumerate(zip(antpol_flags, ant_flags)):
plt.text(x, nflags/2, f'{nflags} Feeds on \n{nants} Antennas\nFlagged', va='center', ha='center')
# set labels
plt.ylabel('Antenna-Polarizations Flagged')
plt.xlabel('Reason for Flagging')
plt.tight_layout()
This bar chart summarizes the number of antenna-polarizations that are statistical outliers in each metric (though often they overlap). Some of these issues occur on both polarizations, so there are fewer unique antennas flagged for each rationale than there are ant-pols flagged, as noted by the labels.
Flag_Bar_Chart()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Input In [15], in <cell line: 1>() ----> 1 Flag_Bar_Chart() Input In [14], in Flag_Bar_Chart() 6 flags_list = np.array([binary_flags[rationale] for rationale in rationales]) 7 antpol_flags = [np.sum(f) for f in flags_list] ----> 8 ant_flags = [np.sum(np.array(f)[0::2] | np.array(f)[1::2]) for f in flags_list] 10 # make bar chart 11 plt.bar(np.arange(len(rationales)), antpol_flags, width=.7, color='yellow', ec='k', tick_label=rationales) Input In [14], in <listcomp>(.0) 6 flags_list = np.array([binary_flags[rationale] for rationale in rationales]) 7 antpol_flags = [np.sum(f) for f in flags_list] ----> 8 ant_flags = [np.sum(np.array(f)[0::2] | np.array(f)[1::2]) for f in flags_list] 10 # make bar chart 11 plt.bar(np.arange(len(rationales)), antpol_flags, width=.7, color='yellow', ec='k', tick_label=rationales) TypeError: ufunc 'bitwise_or' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
<Figure size 800x400 with 0 Axes>
def Rationale_Corr_Plot():
plt.figure(figsize=(6,6), dpi=100)
# compute correlation matrix
rationales = list(binary_flags.keys())
flags_list = np.array([binary_flags[rationale] for rationale in rationales])
corrs = np.corrcoef(flags_list)
# plot and label correlation matrix
plt.imshow(corrs, cmap='viridis', interpolation='nearest', origin='upper')
for i in range(corrs.shape[0]):
for j in range(corrs.shape[1]):
plt.text(i, j, np.round(corrs[i, j], 3), va='center', ha='center',
bbox={'facecolor': 'w', 'ec': 'w', 'alpha': .75})
# colorbar, labels, and style
plt.yticks(range(len(rationales)), rationales)
plt.xticks(range(len(rationales)), rationales, rotation=-45, ha='right')
plt.gca().xaxis.tick_top()
plt.clim([0, 1])
plt.colorbar(fraction=0.046, pad=0.04)
plt.tight_layout()
This plot shows the probability that if a given ant-pol is flagged for some reason, it's also flagged for another reason.
Rationale_Corr_Plot()
def plot_all_spectra(spectra, mod_zs, modz_cut, overall, freqs, reason, ex_ants=[],
xlabel='Frequency (MHz)', ylabel='', yscale='linear', ylim_factor=None):
'''Helper function for plotting all spectra and showing which ones were flagged and why.'''
fig, axes = plt.subplots(1,2, figsize=(14,5), dpi=100)
pols = sorted(set([bl[2] for bl in spectra]))
for ax, pol in zip(axes, pols):
# sort antennas into good, bad, and bad but not for this reason
bad_here = [bl for bl in spectra if (bl[2] == pol) and mod_zs[bl] > modz_cut]
other_bad = [bl for bl in spectra if (bl[2] == pol) and (bl[0] in ex_ants) and (bl not in bad_here)]
good = [bl for bl in spectra if (bl[2] == pol) and (bl[0] not in ex_ants)]
# plot all spectra
l1, l2, l3 = None, None, None
for bl in other_bad:
l2, = ax.plot(freqs, spectra[bl], 'darkviolet', lw=.5)
for bl in bad_here:
l1, = ax.plot(freqs, spectra[bl], 'r', lw=.5)
for bl in good:
l3, = ax.plot(freqs, spectra[bl], 'grey', alpha=.5)
l4, = ax.plot(freqs, overall[bl[2]], 'k--')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_yscale(yscale)
if ylim_factor is not None:
ax.set_ylim([np.nanmin([spectra[bl] for bl in good]) / ylim_factor,
np.nanmax([spectra[bl] for bl in good]) * ylim_factor])
ax.set_title(f'Outliers in Autocorrelation {reason}: {pol}')
ax.legend([l1, l2, l3, l4], [f'Flagged for {reason}', 'Flagged for Another Reason', 'Unflagged', 'Average Unflagged'], loc='lower right')
plt.tight_layout()
overall_shape = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra_normed'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
overall_power = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
overall_temp_var = {pol: np.nanmean([spec for bl, spec in am['spectra']['std_spectra_normed'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
overall_temp_diff = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_abs_diff_spectra_normed'].items() if (bl[2] == pol)
and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()}
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [19], in <cell line: 1>() 1 overall_shape = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra_normed'].items() if (bl[2] == pol) ----> 2 and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()} 3 overall_power = {pol: np.nanmean([spec for bl, spec in am['spectra']['mean_spectra'].items() if (bl[2] == pol) 4 and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()} 5 overall_temp_var = {pol: np.nanmean([spec for bl, spec in am['spectra']['std_spectra_normed'].items() if (bl[2] == pol) 6 and (bl[0] not in ex_ants)], axis=0) for pol in autos.pols()} NameError: name 'autos' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation shape (see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['mean_spectra_normed'], am['modzs']['r2_shape_modzs'], mean_round_modz_cut,
overall_shape, freqs, 'Shape', ex_ants=ex_ants, yscale='linear', ylim_factor=1.2)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [20], in <cell line: 1>() 1 plot_all_spectra(am['spectra']['mean_spectra_normed'], am['modzs']['r2_shape_modzs'], mean_round_modz_cut, ----> 2 overall_shape, freqs, 'Shape', ex_ants=ex_ants, yscale='linear', ylim_factor=1.2) NameError: name 'overall_shape' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation amplitude (see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['mean_spectra'], am['modzs']['r2_power_modzs'], mean_round_modz_cut,
overall_power, freqs, 'Power', ex_ants=ex_ants, yscale='log')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [21], in <cell line: 1>() 1 plot_all_spectra(am['spectra']['mean_spectra'], am['modzs']['r2_power_modzs'], mean_round_modz_cut, ----> 2 overall_power, freqs, 'Power', ex_ants=ex_ants, yscale='log') NameError: name 'overall_power' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation temporal variability (as measured by a standard deviation over time; see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['std_spectra_normed'], am['modzs']['r2_temp_var_modzs'], mean_round_modz_cut,
overall_temp_var, freqs, 'Temporal Variability', ex_ants=ex_ants, yscale='log')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [22], in <cell line: 1>() 1 plot_all_spectra(am['spectra']['std_spectra_normed'], am['modzs']['r2_temp_var_modzs'], mean_round_modz_cut, ----> 2 overall_temp_var, freqs, 'Temporal Variability', ex_ants=ex_ants, yscale='log') NameError: name 'overall_temp_var' is not defined
This plot summarizes the spectra computed to compare to one another to find outliers in autocorrelation temporal discontinuities (as measured by the average absolute integration-to-integration difference over time; see above for how that was computed). The mean compared to is shown as a black dashed line. Antennas in red were flagged as outliers, antennas in gray and purple were not. However, antennas in purple were flagged for some other reason, either another metric or on the other polarization. Completely flagged channels (RFI and band edges) appear as white gaps.
plot_all_spectra(am['spectra']['mean_abs_diff_spectra_normed'], am['modzs']['r2_temp_diff_modzs'], mean_round_modz_cut,
overall_temp_diff, freqs, 'Temporal Discontinutities', ex_ants=ex_ants, yscale='log')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [23], in <cell line: 1>() 1 plot_all_spectra(am['spectra']['mean_abs_diff_spectra_normed'], am['modzs']['r2_temp_diff_modzs'], mean_round_modz_cut, ----> 2 overall_temp_diff, freqs, 'Temporal Discontinutities', ex_ants=ex_ants, yscale='log') NameError: name 'overall_temp_diff' is not defined
# compute average good autocorrelations for each polarization
avg_good_autos = {pol: np.zeros(wf_shape, dtype=float) for pol in autos.pols()}
for pol in autos.pols():
for i in range(wf_shape[0]):
avg_good_autos[pol][i] = np.mean([np.abs(autos[bl][i, :]) for bl in autos
if (bl[0] not in ex_ants) and (bl[2] == pol)], axis=0)
avg_good_autos[pol][am['flags']] = np.nan
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [24], in <cell line: 2>() 1 # compute average good autocorrelations for each polarization ----> 2 avg_good_autos = {pol: np.zeros(wf_shape, dtype=float) for pol in autos.pols()} 3 for pol in autos.pols(): 4 for i in range(wf_shape[0]): NameError: name 'autos' is not defined
def Avg_Auto_Plot():
fig, axes = plt.subplots(1, 2, figsize=(14,5), dpi=100)
for ax, pol in zip(axes, sorted(autos.pols())):
im = ax.imshow(avg_good_autos[pol], aspect='auto', interpolation='nearest',
extent=[freqs[0], freqs[-1], times[-1], times[0]])
ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2))
plt.colorbar(im, ax=ax)
ax.set_title(f'Average Good Raw {pol} Autocorrelation After Flagging')
ax.set_ylabel('LST (hours)')
ax.set_xlabel('Frequency (MHz)')
plt.tight_layout()
Here we show the waterfalls of the array-averaged autocorrelations over the night, after removing all flagged antennas. We also show the RFI mask generated between the median and mean rounds of antenna outlier detection. This is meant to show that there is little or no RFI remaining to affect the statistics.
Avg_Auto_Plot()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [26], in <cell line: 1>() ----> 1 Avg_Auto_Plot() Input In [25], in Avg_Auto_Plot() 1 def Avg_Auto_Plot(): 2 fig, axes = plt.subplots(1, 2, figsize=(14,5), dpi=100) ----> 3 for ax, pol in zip(axes, sorted(autos.pols())): 4 im = ax.imshow(avg_good_autos[pol], aspect='auto', interpolation='nearest', 5 extent=[freqs[0], freqs[-1], times[-1], times[0]]) 6 ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2)) NameError: name 'autos' is not defined
def plot_spectra(axes, ant, spec, modzs, modz_cut, overall, reason, yscale='linear'):
'''Helper function for plotting both antennas of a given polarization.'''
for pol, ax in zip(sorted(autos.pols()), axes):
bl = (ant, ant, pol)
# plot good antennas
for bl2 in modzs:
if (bl2[0] not in ex_ants) and (bl2[2] == pol):
ax.plot(freqs, spec[bl2], 'grey', lw=.5, alpha=.5)
ax.plot(freqs, overall[pol], 'k--')
# plot this anetnna
color = 'r'
alpha = .75
if modzs[bl] >= modz_cut:
alpha = 1
elif bl[0] in ex_ants:
color = 'darkviolet'
else:
color = 'darkgreen'
ax.plot(freqs, spec[bl], color, alpha=alpha, label=f'{ant} {pol}\n(z = {np.round(modzs[bl],1)})')
# decorate axis
ax.set_yscale(yscale)
ax.legend(loc=2)
ax.set_title(f'{pol} {reason}')
len(ants)
128
def Plot_All_Auto_Metrics():
for row, ant in enumerate(df['Ant']):
# print title of section
display(HTML(f'<h2>Antenna {ant}: {JD}</h2>'))
# print metrics
df_row = df.loc[df['Ant'] == ant]
df_row = df_row.style.hide_index()\
.set_table_styles([dict(selector="th",props=[('max-width', '70pt')])])\
.applymap(lambda val: f'background-color: {status_colors[val]}' if val in status_colors else '', subset=['A Priori Status']) \
.applymap(lambda val: 'font-weight: bold' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: 'color: red' if val in ex_ants else '', subset=['Ant']) \
.applymap(lambda val: 'font-weight: bold' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols) \
.applymap(lambda val: 'color: red' if val > am['parameters']['mean_round_modz_cut'] else '', subset=z_score_cols)
display(HTML(df_row.render()))
# plot spectra and waterfalls
fig, axes = plt.subplots(2, 6, figsize=(18, 5.5), dpi=(50 * (104 / len(ants))**.5)) # this should help manage filesize
# plot individual spectra compared to all good antennas
plot_spectra(axes[:, 0], ant, am['spectra']['mean_spectra_normed'], am['modzs']['r2_shape_modzs'],
mean_round_modz_cut, overall_shape, 'Shape', yscale='linear')
plot_spectra(axes[:, 1], ant, am['spectra']['mean_spectra'], am['modzs']['r2_power_modzs'],
mean_round_modz_cut, overall_power, 'Power', yscale='log')
plot_spectra(axes[:, 2], ant, am['spectra']['std_spectra_normed'], am['modzs']['r2_temp_var_modzs'],
mean_round_modz_cut, overall_temp_var, 'Temporal Variability', yscale='linear')
plot_spectra(axes[:, 3], ant, am['spectra']['mean_abs_diff_spectra_normed'], am['modzs']['r2_temp_diff_modzs'],
mean_round_modz_cut, overall_temp_diff, 'Temporal Discontinutities', yscale='linear')
plt.tight_layout()
# plot linear-scale waterfalls
for pol, ax in zip(sorted(autos.pols()), axes[:, 4]):
bl = (ant, ant, pol)
im = ax.imshow(np.where(am['flags'], np.nan, autos[bl].real),
aspect='auto', interpolation='nearest', cmap='inferno',
extent=[freqs[0], freqs[-1], times[-1], times[0]])
ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2))
ax.set_title(f'{pol} Waterfall (Linear Scale)')
# plot log-scale mean-divided waterfalls
for pol, ax in zip(sorted(autos.pols()), axes[:, 5]):
bl = (ant, ant, pol)
to_plot = autos[bl].real / avg_good_autos[pol]
to_plot[am['flags']] = np.nan
to_plot /= np.nanmean(to_plot)
im = ax.imshow(np.log10(to_plot), aspect='auto', cmap='seismic', interpolation='nearest', vmin=-.07, vmax=.07,
extent=[freqs[0], freqs[-1], times[-1], times[0]])
ax.set_yticklabels(np.around(lsts[[min(max(np.searchsorted(times, t), 0), len(times) - 1) for t in ax.get_yticks()]], 2))
ax.set_title(f'{pol} Log(Normalized Waterfall)')
plt.show()
# print some whitespace
display(HTML('<hr style="height:3px">'))
Here we show the metrics for each antenna and the spectra/waterfalls that hopefully explain what led to them. The table reproduces the information from Table 1 above. The first four panels in each row clearly highlight the antenna's spectrum as it compares to the mean good antenna (black) and the distribution of good antennas (gray). Spectra in red were flagged as outliers. Spectra in purple were flagged for some other reason, either another metric or on the other polarization. Good antennas are shown in green. Completely flagged channels (RFI and band edges) appear as white gaps. In the fifth column, the waterfall of that autocorrelation is shown on a linear scale after RFI/band edge flags (white). In the sixth column, we show the log (base 10) of the same waterfall, divided by the average good antennas' waterfall of that polarization and then normalized to an average of 1.
Plot_All_Auto_Metrics()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [30], in <cell line: 1>() ----> 1 Plot_All_Auto_Metrics() Input In [29], in Plot_All_Auto_Metrics() 1 def Plot_All_Auto_Metrics(): ----> 2 for row, ant in enumerate(df['Ant']): 3 # print title of section 4 display(HTML(f'<h2>Antenna {ant}: {JD}</h2>')) 6 # print metrics NameError: name 'df' is not defined
from hera_qm import __version__
print(__version__)
2.0.4.dev8+g8ce4cac