Josh Dillon, Last Revised February 2021
This notebooks brings together as much information as possible from ant_metrics
, auto_metrics
and redcal
to help figure out which antennas are working properly and summarizes it in a single giant table. It is meant to be lightweight and re-run as often as necessary over the night, so it can be run when any of those is done and then be updated when another one completes.
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
pd.set_option('display.max_rows', 1000)
from hera_qm.metrics_io import load_metric_file
from hera_cal import utils, io, redcal
import glob
import h5py
from copy import deepcopy
from IPython.display import display, HTML
from hera_notebook_templates.utils import status_colors
from hera_mc import mc
from pyuvdata import UVData
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
display(HTML("<style>.container { width:100% !important; }</style>"))
# If you want to run this notebook locally, copy the output of the next cell into the first few lines of this cell.
# JD = "2459122"
# data_path = '/lustre/aoc/projects/hera/H4C/2459122'
# ant_metrics_ext = ".ant_metrics.hdf5"
# redcal_ext = ".maybe_good.omni.calfits"
# nb_outdir = '/lustre/aoc/projects/hera/H4C/h4c_software/H4C_Notebooks/_rtp_summary_'
# good_statuses = "digital_ok,calibration_maintenance,calibration_triage,calibration_ok"
# os.environ["JULIANDATE"] = JD
# os.environ["DATA_PATH"] = data_path
# os.environ["ANT_METRICS_EXT"] = ant_metrics_ext
# os.environ["REDCAL_EXT"] = redcal_ext
# os.environ["NB_OUTDIR"] = nb_outdir
# os.environ["GOOD_STATUSES"] = good_statuses
# Use environment variables to figure out path to data
JD = os.environ['JULIANDATE']
data_path = os.environ['DATA_PATH']
ant_metrics_ext = os.environ['ANT_METRICS_EXT']
redcal_ext = os.environ['REDCAL_EXT']
nb_outdir = os.environ['NB_OUTDIR']
good_statuses = os.environ['GOOD_STATUSES']
print(f'JD = "{JD}"')
print(f'data_path = "{data_path}"')
print(f'ant_metrics_ext = "{ant_metrics_ext}"')
print(f'redcal_ext = "{redcal_ext}"')
print(f'nb_outdir = "{nb_outdir}"')
print(f'good_statuses = "{good_statuses}"')
JD = "2459811" data_path = "/mnt/sn1/2459811" ant_metrics_ext = ".ant_metrics.hdf5" redcal_ext = ".known_good.omni.calfits" nb_outdir = "/home/obs/src/H5C_Notebooks/_rtp_summary_" good_statuses = "digital_ok,calibration_maintenance,calibration_triage,calibration_ok"
from astropy.time import Time
utc = Time(JD, format='jd').datetime
print(f'Date: {utc.month}-{utc.day}-{utc.year}')
Date: 8-19-2022
# Per-season options
def ant_to_report_url(ant):
return f'https://htmlpreview.github.io/?https://github.com/HERA-Team/H6C_Notebooks/blob/main/antenna_report/antenna_{ant}_report.html'
use_auto_metrics = False
# find the auto_metrics file
glob_str = os.path.join(data_path, f'zen.{JD}*.auto_metrics.h5')
auto_metrics_file = sorted(glob.glob(glob_str))
# if it exists, load and extract relevant information
if len(auto_metrics_file) > 0:
auto_metrics_file = auto_metrics_file[0]
print(f'Found auto_metrics results file at {auto_metrics_file}.')
auto_metrics = load_metric_file(auto_metrics_file)
mean_round_modz_cut = auto_metrics['parameters']['mean_round_modz_cut']
auto_ex_ants = auto_metrics['ex_ants']['r2_ex_ants']
use_auto_metrics = True
else:
print(f'No files found matching glob {glob_str}. Skipping auto_metrics.')
No files found matching glob /mnt/sn1/2459811/zen.2459811*.auto_metrics.h5. Skipping auto_metrics.
use_ant_metrics = False
# get a list of all ant_metrics files
glob_str = os.path.join(data_path, f'zen.{JD}.?????.sum{ant_metrics_ext}')
ant_metrics_files = sorted(glob.glob(glob_str))
# if they exist, load as many of them as possible
if len(ant_metrics_files) > 0:
print(f'Found {len(ant_metrics_files)} ant_metrics files matching glob {glob_str}')
ant_metrics_apriori_exants = {}
ant_metrics_xants_dict = {}
ant_metrics_dead_ants_dict = {}
ant_metrics_crossed_ants_dict = {}
ant_metrics_dead_metrics = {}
ant_metrics_crossed_metrics = {}
dead_cuts = {}
crossed_cuts = {}
for amf in ant_metrics_files:
with h5py.File(amf, "r") as infile: # use h5py directly since it's much faster than load_metric_file
# get out results for this file
dead_cuts[amf] = infile['Metrics']['dead_ant_cut'][()]
crossed_cuts[amf] = infile['Metrics']['cross_pol_cut'][()]
xants = infile['Metrics']['xants'][:]
dead_ants = infile['Metrics']['dead_ants'][:]
crossed_ants = infile['Metrics']['crossed_ants'][:]
try:
# look for ex_ants in history
ex_ants_string = infile['Header']['history'][()].decode()
ex_ants_string = ex_ants_string.split('--apriori_xants')[1]
ex_ants_string = ex_ants_string.split('--')[0].strip()
except:
ex_ants_string = ''
# This only works for the new correlation-matrix-based ant_metrics
if 'corr' in infile['Metrics']['final_metrics'] and 'corrXPol' in infile['Metrics']['final_metrics']:
ant_metrics_dead_metrics[amf] = {eval(ant): infile['Metrics']['final_metrics']['corr'][ant][()]
for ant in infile['Metrics']['final_metrics']['corr']}
ant_metrics_crossed_metrics[amf] = {eval(ant): infile['Metrics']['final_metrics']['corrXPol'][ant][()]
for ant in infile['Metrics']['final_metrics']['corrXPol']}
else:
raise(KeywordError)
# organize results by file
ant_metrics_xants_dict[amf] = [(int(ant[0]), ant[1].decode()) for ant in xants]
ant_metrics_dead_ants_dict[amf] = [(int(ant[0]), ant[1].decode()) for ant in dead_ants]
ant_metrics_crossed_ants_dict[amf] = [(int(ant[0]), ant[1].decode()) for ant in crossed_ants]
ant_metrics_apriori_exants[amf] = [int(ant) for ant in ex_ants_string.split()]
dead_cut = np.median(list(dead_cuts.values()))
crossed_cut = np.median(list(crossed_cuts.values()))
use_ant_metrics = True
else:
print(f'No files found matching glob {glob_str}. Skipping ant_metrics.')
Found 442 ant_metrics files matching glob /mnt/sn1/2459811/zen.2459811.?????.sum.ant_metrics.hdf5
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [7], in <cell line: 8>() 38 ant_metrics_crossed_metrics[amf] = {eval(ant): infile['Metrics']['final_metrics']['corrXPol'][ant][()] 39 for ant in infile['Metrics']['final_metrics']['corrXPol']} 40 else: ---> 41 raise(KeywordError) 43 # organize results by file 44 ant_metrics_xants_dict[amf] = [(int(ant[0]), ant[1].decode()) for ant in xants] NameError: name 'KeywordError' is not defined
use_redcal = False
glob_str = os.path.join(data_path, f'zen.{JD}.?????.sum{redcal_ext}')
redcal_files = sorted(glob.glob(glob_str))
if len(redcal_files) > 0:
print(f'Found {len(redcal_files)} ant_metrics files matching glob {glob_str}')
post_redcal_ant_flags_dict = {}
flagged_by_redcal_dict = {}
cspa_med_dict = {}
for cal in redcal_files:
hc = io.HERACal(cal)
_, flags, cspa, chisq = hc.read()
cspa_med_dict[cal] = {ant: np.nanmedian(cspa[ant], axis=1) for ant in cspa}
post_redcal_ant_flags_dict[cal] = {ant: np.all(flags[ant]) for ant in flags}
# check history to distinguish antennas flagged going into redcal from ones flagged during redcal
tossed_antenna_lines = hc.history.replace('\n','').split('Throwing out antenna ')[1:]
flagged_by_redcal_dict[cal] = sorted([int(line.split(' ')[0]) for line in tossed_antenna_lines])
use_redcal = True
else:
print(f'No files found matching glob {glob_str}. Skipping redcal chisq.')
No files found matching glob /mnt/sn1/2459811/zen.2459811.?????.sum.known_good.omni.calfits. Skipping redcal chisq.
# Parse some general array properties, taking into account the fact that we might be missing some of the metrics
ants = []
pols = []
antpol_pairs = []
if use_auto_metrics:
ants = sorted(set(bl[0] for bl in auto_metrics['modzs']['r2_shape_modzs']))
pols = sorted(set(bl[2] for bl in auto_metrics['modzs']['r2_shape_modzs']))
if use_ant_metrics:
antpol_pairs = sorted(set([antpol for dms in ant_metrics_dead_metrics.values() for antpol in dms.keys()]))
antpols = sorted(set(antpol[1] for antpol in antpol_pairs))
ants = sorted(set(antpol[0] for antpol in antpol_pairs) | set(ants))
pols = sorted(set(utils.join_pol(ap, ap) for ap in antpols) | set(pols))
if use_redcal:
antpol_pairs = sorted(set([ant for cspa in cspa_med_dict.values() for ant in cspa.keys()]) | set(antpol_pairs))
antpols = sorted(set(antpol[1] for antpol in antpol_pairs))
ants = sorted(set(antpol[0] for antpol in antpol_pairs) | set(ants))
pols = sorted(set(utils.join_pol(ap, ap) for ap in antpols) | set(pols))
# Figure out remaining antennas not in data and also LST range
data_files = sorted(glob.glob(os.path.join(data_path, 'zen.*.sum.uvh5')))
hd = io.HERAData(data_files[0])
unused_ants = [ant for ant in hd.antpos if ant not in ants]
hd_last = io.HERAData(data_files[-1])
# try to load a priori antenna statusesm but fail gracefully if this doesn't work.
a_priori_statuses = {ant: 'Not Found' for ant in ants}
nodes = {ant: np.nan for ant in ants + unused_ants}
try:
from hera_mc import cm_hookup
# get node numbers
hookup = cm_hookup.get_hookup('default')
for ant_name in hookup:
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in nodes:
if hookup[ant_name].get_part_from_type('node')['E<ground'] is not None:
nodes[ant] = int(hookup[ant_name].get_part_from_type('node')['E<ground'][1:])
# get apriori antenna status
for ant_name, data in hookup.items():
ant = int("".join(filter(str.isdigit, ant_name)))
if ant in a_priori_statuses:
a_priori_statuses[ant] = data.apriori
except Exception as err:
print(f'Could not load node numbers and a priori antenna statuses.\nEncountered {type(err)} with message: {err}')
if use_auto_metrics:
# Parse modzs
modzs_to_check = {'Shape': 'r2_shape_modzs', 'Power': 'r2_power_modzs',
'Temporal Variability': 'r2_temp_var_modzs', 'Temporal Discontinuties': 'r2_temp_diff_modzs'}
worst_metrics = []
worst_zs = []
all_modzs = {}
binary_flags = {rationale: [] for rationale in modzs_to_check}
for ant in ants:
# parse modzs and figure out flag counts
modzs = {f'{pol} {rationale}': auto_metrics['modzs'][dict_name][(ant, ant, pol)]
for rationale, dict_name in modzs_to_check.items() for pol in pols}
for pol in pols:
for rationale, dict_name in modzs_to_check.items():
binary_flags[rationale].append(auto_metrics['modzs'][dict_name][(ant, ant, pol)] > mean_round_modz_cut)
# parse out all metrics for dataframe
for k in modzs:
col_label = k + ' Modified Z-Score'
if col_label in all_modzs:
all_modzs[col_label].append(modzs[k])
else:
all_modzs[col_label] = [modzs[k]]
mean_round_modz_cut = auto_metrics['parameters']['mean_round_modz_cut']
else:
mean_round_modz_cut = 0
if use_ant_metrics:
a_priori_flag_frac = {ant: np.mean([ant in apxa for apxa in ant_metrics_apriori_exants.values()]) for ant in ants}
dead_ant_frac = {ap: {ant: np.mean([(ant, ap) in das for das in ant_metrics_dead_ants_dict.values()])
for ant in ants} for ap in antpols}
crossed_ant_frac = {ant: np.mean([np.any([(ant, ap) in cas for ap in antpols])
for cas in ant_metrics_crossed_ants_dict.values()]) for ant in ants}
ant_metrics_xants_frac_by_antpol = {antpol: np.mean([antpol in amx for amx in ant_metrics_xants_dict.values()]) for antpol in antpol_pairs}
ant_metrics_xants_frac_by_ant = {ant: np.mean([np.any([(ant, ap) in amx for ap in antpols])
for amx in ant_metrics_xants_dict.values()]) for ant in ants}
average_dead_metrics = {ap: {ant: np.nanmean([dm.get((ant, ap), np.nan) for dm in ant_metrics_dead_metrics.values()])
for ant in ants} for ap in antpols}
average_crossed_metrics = {ant: np.nanmean([cm.get((ant, ap), np.nan) for ap in antpols
for cm in ant_metrics_crossed_metrics.values()]) for ant in ants}
else:
dead_cut = 0.4
crossed_cut = 0.0
if use_redcal:
cspa = {ant: np.nanmedian(np.hstack([cspa_med_dict[cal][ant] for cal in redcal_files])) for ant in antpol_pairs}
redcal_prior_flag_frac = {ant: np.mean([np.any([afd[ant, ap] and not ant in flagged_by_redcal_dict[cal] for ap in antpols])
for cal, afd in post_redcal_ant_flags_dict.items()]) for ant in ants}
redcal_flagged_frac = {ant: np.mean([ant in fbr for fbr in flagged_by_redcal_dict.values()]) for ant in ants}
HHautos = sorted(glob.glob(f"{data_path}/zen.{JD}.*.sum.autos.uvh5"))
diffautos = sorted(glob.glob(f"{data_path}/zen.{JD}.*.diff.autos.uvh5"))
try:
db = mc.connect_to_mc_db(None)
session = db.sessionmaker()
startJD = float(HHautos[0].split('zen.')[1].split('.sum')[0])
stopJD = float(HHautos[-1].split('zen.')[1].split('.sum')[0])
startTime = Time(startJD,format='jd')
stopTime = Time(stopJD,format='jd')
res = session.get_antenna_status(starttime=startTime, stoptime=stopTime)
fem_switches = {}
if len(res) == 0:
femState = None
else:
for antpol in res:
fem_switches[(antpol.antenna_number, antpol.antenna_feed_pol)] = antpol.fem_switch
femState = (max(set(list(fem_switches.values())), key = list(fem_switches.values()).count))
except Exception as e:
print(e)
femState = None
read_inds = [1, len(HHautos)//2, -2]
x_status = [1,1,1,1,1,1,1,1]
s = UVData()
s.read(HHautos[1])
nants = len(s.get_ants())
freqs = s.freq_array[0]*1e-6
nfreqs = len(freqs)
antCon = {a: None for a in ants}
rightAnts = []
for i in read_inds:
s = UVData()
d = UVData()
s.read(HHautos[i])
d.read(diffautos[i])
for pol in [0,1]:
sm = np.abs(s.data_array[:,0,:,pol])
df = np.abs(d.data_array[:,0,:,pol])
sm = np.r_[sm, np.nan + np.zeros((-len(sm) % nants,len(freqs)))]
sm = np.nanmean(sm.reshape(-1,nants,nfreqs),axis=1)
df = np.r_[df, np.nan + np.zeros((-len(df) % nants,len(freqs)))]
df = np.nanmean(df.reshape(-1,nants,nfreqs),axis=1)
evens = (sm + df)/2
odds = (sm - df)/2
rat = np.divide(evens,odds)
rat = np.nan_to_num(rat)
for xbox in range(0,8):
xavg = np.nanmean(rat[:,xbox*192:(xbox+1)*192],axis=1)
if np.nanmax(xavg)>1.5 or np.nanmin(xavg)<0.5:
x_status[xbox] = 0
for ant in ants:
for pol in ["xx", "yy"]:
if antCon[ant] is False:
continue
spectrum = s.get_data(ant, ant, pol)
stdev = np.std(spectrum)
med = np.median(np.abs(spectrum))
if (femState == "load" or femState == 'noise') and 80000 < stdev <= 4000000 and antCon[ant] is not False:
antCon[ant] = True
elif femState == "antenna" and stdev > 500000 and med > 950000 and antCon[ant] is not False:
antCon[ant] = True
else:
antCon[ant] = False
if np.min(np.abs(spectrum)) < 100000:
antCon[ant] = False
for ant in ants:
if antCon[ant] is True:
rightAnts.append(ant)
x_status_str = ''
for i,x in enumerate(x_status):
if x==0:
x_status_str += '\u274C '
else:
x_status_str += '\u2705 '
DataFrame
¶def comma_sep_paragraph(vals, chars_per_line=40):
outstrs = []
for val in vals:
if (len(outstrs) == 0) or (len(outstrs[-1]) > chars_per_line):
outstrs.append(str(val))
else:
outstrs[-1] += ', ' + str(val)
return ',<br>'.join(outstrs)
# Time data
to_show = {'JD': [JD]}
to_show['Date'] = f'{utc.month}-{utc.day}-{utc.year}'
to_show['LST Range'] = f'{hd.lsts[0] * 12 / np.pi:.3f} -- {hd_last.lsts[-1] * 12 / np.pi:.3f} hours'
# X-engine status
to_show['X-Engine Status'] = x_status_str
# Files
to_show['Number of Files'] = len(data_files)
# Antenna Calculations
to_show['Total Number of Antennas'] = len(ants)
to_show[' '] = ''
to_show['OPERATIONAL STATUS SUMMARY'] = ''
status_count = {status: 0 for status in status_colors}
for ant, status in a_priori_statuses.items():
if status in status_count:
status_count[status] = status_count[status] + 1
else:
status_count[status] = 1
to_show['Antenna A Priori Status Count'] = '<br>'.join([f'{status}: {status_count[status]}' for status in status_colors if status in status_count and status_count[status] > 0])
to_show['Commanded Signal Source'] = femState
to_show['Antennas in Commanded State'] = f'{len(rightAnts)} / {len(ants)} ({len(rightAnts) / len(ants):.1%})'
if use_ant_metrics:
to_show['Cross-Polarized Antennas'] = ', '.join([str(ant) for ant in ants if (np.max([dead_ant_frac[ap][ant] for ap in antpols]) + crossed_ant_frac[ant] == 1)
and (crossed_ant_frac[ant] > .5)])
# Node calculations
nodes_used = set([nodes[ant] for ant in ants if np.isfinite(nodes[ant])])
to_show['Total Number of Nodes'] = len(nodes_used)
if use_ant_metrics:
node_off = {node: True for node in nodes_used}
not_correlating = {node: True for node in nodes_used}
for ant in ants:
for ap in antpols:
if np.isfinite(nodes[ant]):
if np.isfinite(average_dead_metrics[ap][ant]):
node_off[nodes[ant]] = False
if dead_ant_frac[ap][ant] < 1:
not_correlating[nodes[ant]] = False
to_show['Nodes Registering 0s'] = ', '.join([f'N{n:02}' for n in sorted([node for node in node_off if node_off[node]])])
to_show['Nodes Not Correlating'] = ', '.join([f'N{n:02}' for n in sorted([node for node in not_correlating if not_correlating[node] and not node_off[node]])])
# Pipeline calculations
to_show[' '] = ''
to_show['NIGHTLY ANALYSIS SUMMARY'] = ''
all_flagged_ants = []
if use_ant_metrics:
to_show['Ant Metrics Done?'] = '\u2705'
ant_metrics_flagged_ants = [ant for ant in ants if ant_metrics_xants_frac_by_ant[ant] > 0]
all_flagged_ants.extend(ant_metrics_flagged_ants)
to_show['Ant Metrics Flagged Antennas'] = f'{len(ant_metrics_flagged_ants)} / {len(ants)} ({len(ant_metrics_flagged_ants) / len(ants):.1%})'
else:
to_show['Ant Metrics Done?'] = '\u274C'
if use_auto_metrics:
to_show['Auto Metrics Done?'] = '\u2705'
auto_metrics_flagged_ants = [ant for ant in ants if ant in auto_ex_ants]
all_flagged_ants.extend(auto_metrics_flagged_ants)
to_show['Auto Metrics Flagged Antennas'] = f'{len(auto_metrics_flagged_ants)} / {len(ants)} ({len(auto_metrics_flagged_ants) / len(ants):.1%})'
else:
to_show['Auto Metrics Done?'] = '\u274C'
if use_redcal:
to_show['Redcal Done?'] = '\u2705'
redcal_flagged_ants = [ant for ant in ants if redcal_flagged_frac[ant] > 0]
all_flagged_ants.extend(redcal_flagged_ants)
to_show['Redcal Flagged Antennas'] = f'{len(redcal_flagged_ants)} / {len(ants)} ({len(redcal_flagged_ants) / len(ants):.1%})'
else:
to_show['Redcal Done?'] = '\u274C'
to_show['Never Flagged Antennas'] = f'{len(ants) - len(set(all_flagged_ants))} / {len(ants)} ({(len(ants) - len(set(all_flagged_ants))) / len(ants):.1%})'
# Count bad antennas with good statuses and vice versa
n_apriori_good = len([ant for ant in ants if a_priori_statuses[ant] in good_statuses.split(',')])
apriori_good_flagged = []
aprior_bad_unflagged = []
for ant in ants:
if ant in set(all_flagged_ants) and a_priori_statuses[ant] in good_statuses.split(','):
apriori_good_flagged.append(ant)
elif ant not in set(all_flagged_ants) and a_priori_statuses[ant] not in good_statuses.split(','):
aprior_bad_unflagged.append(ant)
to_show['A Priori Good Antennas Flagged'] = f'{len(apriori_good_flagged)} / {n_apriori_good} total a priori good antennas:<br>' + \
comma_sep_paragraph(apriori_good_flagged)
to_show['A Priori Bad Antennas Not Flagged'] = f'{len(aprior_bad_unflagged)} / {len(ants) - n_apriori_good} total a priori bad antennas:<br>' + \
comma_sep_paragraph(aprior_bad_unflagged)
# Apply Styling
df = pd.DataFrame(to_show)
divider_cols = [df.columns.get_loc(col) for col in ['NIGHTLY ANALYSIS SUMMARY', 'OPERATIONAL STATUS SUMMARY']]
try:
to_red_columns = [df.columns.get_loc(col) for col in ['Cross-Polarized Antennas', 'Nodes Registering 0s',
'Nodes Not Correlating', 'A Priori Good Antennas Flagged']]
except:
to_red_columns = []
def red_specific_cells(x):
df1 = pd.DataFrame('', index=x.index, columns=x.columns)
for col in to_red_columns:
df1.iloc[col] = 'color: red'
return df1
df = df.T
table = df.style.hide_columns().apply(red_specific_cells, axis=None)
for col in divider_cols:
table = table.set_table_styles([{"selector":f"tr:nth-child({col+1})", "props": [("background-color", "black"), ("color", "white")]}], overwrite=False)
--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) Input In [17], in <cell line: 27>() 24 to_show['Antenna A Priori Status Count'] = '<br>'.join([f'{status}: {status_count[status]}' for status in status_colors if status in status_count and status_count[status] > 0]) 26 to_show['Commanded Signal Source'] = femState ---> 27 to_show['Antennas in Commanded State'] = f'{len(rightAnts)} / {len(ants)} ({len(rightAnts) / len(ants):.1%})' 29 if use_ant_metrics: 30 to_show['Cross-Polarized Antennas'] = ', '.join([str(ant) for ant in ants if (np.max([dead_ant_frac[ap][ant] for ap in antpols]) + crossed_ant_frac[ant] == 1) 31 and (crossed_ant_frac[ant] > .5)]) ZeroDivisionError: division by zero
HTML(table.render())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [18], in <cell line: 1>() ----> 1 HTML(table.render()) NameError: name 'table' is not defined
# write to csv
outpath = os.path.join(nb_outdir, f'array_health_table_{JD}.csv')
print(f'Now saving Table 2 to a csv at {outpath}')
df.replace({'\u2705': 'Y'}, regex=True).replace({'\u274C': 'N'}, regex=True).replace({'<br>': ' '}, regex=True).to_csv(outpath)
Now saving Table 2 to a csv at /home/obs/src/H5C_Notebooks/_rtp_summary_/array_health_table_2459811.csv
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Input In [19], in <cell line: 4>() 2 outpath = os.path.join(nb_outdir, f'array_health_table_{JD}.csv') 3 print(f'Now saving Table 2 to a csv at {outpath}') ----> 4 df.replace({'\u2705': 'Y'}, regex=True).replace({'\u274C': 'N'}, regex=True).replace({'<br>': ' '}, regex=True).to_csv(outpath) AttributeError: 'numpy.ndarray' object has no attribute 'replace'
DataFrame
¶# build dataframe
to_show = {'Ant': [f'<a href="{ant_to_report_url(ant)}" target="_blank">{ant}</a>' for ant in ants],
'Node': [f'N{nodes[ant]:02}' for ant in ants],
'A Priori Status': [a_priori_statuses[ant] for ant in ants]}
#'Worst Metric': worst_metrics, 'Worst Modified Z-Score': worst_zs}
df = pd.DataFrame(to_show)
# create bar chart columns for flagging percentages:
bar_cols = {}
if use_auto_metrics:
bar_cols['Auto Metrics Flags'] = [float(ant in auto_ex_ants) for ant in ants]
if use_ant_metrics:
if np.sum(list(a_priori_flag_frac.values())) > 0: # only include this col if there are any a priori flags
bar_cols['A Priori Flag Fraction in Ant Metrics'] = [a_priori_flag_frac[ant] for ant in ants]
for ap in antpols:
bar_cols[f'Dead Fraction in Ant Metrics ({ap})'] = [dead_ant_frac[ap][ant] for ant in ants]
bar_cols['Crossed Fraction in Ant Metrics'] = [crossed_ant_frac[ant] for ant in ants]
if use_redcal:
bar_cols['Flag Fraction Before Redcal'] = [redcal_prior_flag_frac[ant] for ant in ants]
bar_cols['Flagged By Redcal chi^2 Fraction'] = [redcal_flagged_frac[ant] for ant in ants]
for col in bar_cols:
df[col] = bar_cols[col]
# add auto_metrics
if use_auto_metrics:
for label, modz in all_modzs.items():
df[label] = modz
z_score_cols = [col for col in df.columns if 'Modified Z-Score' in col]
# add ant_metrics
ant_metrics_cols = {}
if use_ant_metrics:
for ap in antpols:
ant_metrics_cols[f'Average Dead Ant Metric ({ap})'] = [average_dead_metrics[ap][ant] for ant in ants]
ant_metrics_cols['Average Crossed Ant Metric'] = [average_crossed_metrics[ant] for ant in ants]
for col in ant_metrics_cols:
df[col] = ant_metrics_cols[col]
# add redcal chisq
redcal_cols = []
if use_redcal:
for ap in antpols:
col_title = f'Median chi^2 Per Antenna ({ap})'
df[col_title] = [cspa[ant, ap] for ant in ants]
redcal_cols.append(col_title)
# sort by node number and then by antenna number within nodes
df.sort_values(['Node', 'Ant'], ascending=True)
# style dataframe
table = df.style.hide_index()\
.applymap(lambda val: f'background-color: {status_colors[val]}' if val in status_colors else '', subset=['A Priori Status']) \
.background_gradient(cmap='viridis', vmax=mean_round_modz_cut * 3, vmin=0, axis=None, subset=z_score_cols) \
.background_gradient(cmap='bwr_r', vmin=dead_cut-.25, vmax=dead_cut+.25, axis=0, subset=list([col for col in ant_metrics_cols if 'dead' in col.lower()])) \
.background_gradient(cmap='bwr_r', vmin=crossed_cut-.25, vmax=crossed_cut+.25, axis=0, subset=list([col for col in ant_metrics_cols if 'crossed' in col.lower()])) \
.background_gradient(cmap='plasma', vmax=4, vmin=1, axis=None, subset=redcal_cols) \
.applymap(lambda val: 'font-weight: bold' if val < dead_cut else '', subset=list([col for col in ant_metrics_cols if 'dead' in col.lower()])) \
.applymap(lambda val: 'font-weight: bold' if val < crossed_cut else '', subset=list([col for col in ant_metrics_cols if 'crossed' in col.lower()])) \
.applymap(lambda val: 'font-weight: bold' if val > mean_round_modz_cut else '', subset=z_score_cols) \
.applymap(lambda val: 'color: red' if val > mean_round_modz_cut else '', subset=z_score_cols) \
.bar(subset=list(bar_cols.keys()), vmin=0, vmax=1) \
.format({col: '{:,.4f}'.format for col in z_score_cols}) \
.format({col: '{:,.4f}'.format for col in ant_metrics_cols}) \
.format({col: '{:,.2%}'.format for col in bar_cols}) \
.applymap(lambda val: 'font-weight: bold', subset=['Ant']) \
.set_table_styles([dict(selector="th",props=[('max-width', f'70pt')])])
This admittedly very busy table incorporates summary information about all antennas in the array. Its columns depend on what information is available when the notebook is run (i.e. whether auto_metrics
, ant_metrics
, and/or redcal
is done). These can be divided into 5 sections:
Basic Antenna Info: antenna number, node, and its a priori status.
Flag Fractions: Fraction of the night that an antenna was flagged for various reasons. Note that auto_metrics
flags antennas for the whole night, so it'll be 0% or 100%.
auto_metrics
Details: If auto_metrics
is included, this section shows the modified Z-score signifying how much of an outlier each antenna and polarization is in each of four categories: bandpass shape, overall power, temporal variability, and temporal discontinuities. Bold red text indicates that this is a reason for flagging the antenna. It is reproduced from the auto_metrics_inspect.ipynb
nightly notebook, so check that out for more details on the precise metrics.
ant_metrics
Details: If ant_metrics
is included, this section shows the average correlation-based metrics for antennas over the whole night. Low "dead ant" metrics (nominally below 0.4) indicate antennas not correlating with the rest of the array. Negative "crossed ant" metrics indicate antennas that show stronger correlations in their cross-pols than their same-pols, indicating that the two polarizations are probably swapped. Bold text indicates that the average is below the threshold for flagging.
redcal
chi^2 Details: If redcal
is included, this shows the median chi^2 per antenna. This would be 1 in an ideal array. Antennas are thrown out when they they are outliers in their median chi^2, usually greater than 4-sigma outliers in modified Z-score.
HTML(table.render())
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [21], in <cell line: 1>() ----> 1 HTML(table.render()) File ~/anaconda/envs/RTP/lib/python3.9/site-packages/pandas/io/formats/style.py:344, in Styler.render(self, sparse_index, sparse_columns, **kwargs) 342 if sparse_columns is None: 343 sparse_columns = get_option("styler.sparse.columns") --> 344 return self._render_html(sparse_index, sparse_columns, **kwargs) File ~/anaconda/envs/RTP/lib/python3.9/site-packages/pandas/io/formats/style_render.py:162, in StylerRenderer._render_html(self, sparse_index, sparse_columns, max_rows, max_cols, **kwargs) 150 def _render_html( 151 self, 152 sparse_index: bool, (...) 156 **kwargs, 157 ) -> str: 158 """ 159 Renders the ``Styler`` including all applied styles to HTML. 160 Generates a dict with necessary kwargs passed to jinja2 template. 161 """ --> 162 self._compute() 163 # TODO: namespace all the pandas keys 164 d = self._translate(sparse_index, sparse_columns, max_rows, max_cols) File ~/anaconda/envs/RTP/lib/python3.9/site-packages/pandas/io/formats/style_render.py:205, in StylerRenderer._compute(self) 203 r = self 204 for func, args, kwargs in self._todo: --> 205 r = func(self)(*args, **kwargs) 206 return r File ~/anaconda/envs/RTP/lib/python3.9/site-packages/pandas/io/formats/style.py:1447, in Styler._apply(self, func, axis, subset, **kwargs) 1444 result = data.T.apply(func, axis=0, **kwargs).T # see GH 42005 1446 if isinstance(result, Series): -> 1447 raise ValueError( 1448 f"Function {repr(func)} resulted in the apply method collapsing to a " 1449 f"Series.\nUsually, this is the result of the function returning a " 1450 f"single value, instead of list-like." 1451 ) 1452 msg = ( 1453 f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " 1454 f"the result of the function returning a " (...) 1460 f"Expected {{0}} shape: {{2}}" 1461 ) 1462 if not all(result.index.isin(data.index)): ValueError: Function <function _background_gradient at 0x7f59cb3098b0> resulted in the apply method collapsing to a Series. Usually, this is the result of the function returning a single value, instead of list-like.
# print ex_ants for easy copy-pasting to YAML file
proposed_ex_ants = [ant for i, ant in enumerate(ants) if np.any([col[i] > 0 for col in bar_cols.values()])]
print('ex_ants: [' + ", ".join(str(ant) for ant in proposed_ex_ants) + ']')
print(f'\nunflagged_ants: [{", ".join([str(ant) for ant in ants if ant not in proposed_ex_ants])}]')
# "golden" means no flags and good a priori status
golden_ants = ", ".join([str(ant) for ant in ants if ((ant not in proposed_ex_ants) and (a_priori_statuses[ant] in good_statuses.split(',')))])
print(f'\ngolden_ants: [{golden_ants}]')
ex_ants: [] unflagged_ants: [] golden_ants: []
# write to csv
outpath = os.path.join(nb_outdir, f'rtp_summary_table_{JD}.csv')
print(f'Now saving Table 2 to a csv at {outpath}')
df.to_csv(outpath)
Now saving Table 2 to a csv at /home/obs/src/H5C_Notebooks/_rtp_summary_/rtp_summary_table_2459811.csv
# Load antenna positions
data_list = sorted(glob.glob(os.path.join(data_path, f'zen.{JD}.?????.sum.uvh5')))
hd = io.HERAData(data_list[len(data_list) // 2])
# Figure out where to draw the nodes
node_centers = {}
for node in sorted(set(list(nodes.values()))):
if np.isfinite(node):
this_node_ants = [ant for ant in ants + unused_ants if nodes[ant] == node]
if len(this_node_ants) == 1:
# put the node label just to the west of the lone antenna
node_centers[node] = hd.antpos[ant][node] + np.array([-14.6 / 2, 0, 0])
else:
# put the node label between the two antennas closest to the node center
node_centers[node] = np.mean([hd.antpos[ant] for ant in this_node_ants], axis=0)
closest_two_pos = sorted([hd.antpos[ant] for ant in this_node_ants],
key=lambda pos: np.linalg.norm(pos - node_centers[node]))[0:2]
node_centers[node] = np.mean(closest_two_pos, axis=0)
def Plot_Array(ants, unused_ants, outriggers):
plt.figure(figsize=(16,16))
plt.scatter(np.array([hd.antpos[ant][0] for ant in hd.data_ants if ant in ants]),
np.array([hd.antpos[ant][1] for ant in hd.data_ants if ant in ants]), c='w', s=0)
# connect every antenna to their node
for ant in ants:
if nodes[ant] in node_centers:
plt.plot([hd.antpos[ant][0], node_centers[nodes[ant]][0]],
[hd.antpos[ant][1], node_centers[nodes[ant]][1]], 'k', zorder=0)
rc_color = '#0000ff'
antm_color = '#ffa500'
autom_color = '#ff1493'
# Plot
unflagged_ants = []
for i, ant in enumerate(ants):
ant_has_flag = False
# plot large blue annuli for redcal flags
if use_redcal:
if redcal_flagged_frac[ant] > 0:
ant_has_flag = True
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=7 * (2 - 1 * float(not outriggers)), fill=True, lw=0,
color=rc_color, alpha=redcal_flagged_frac[ant]))
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=6 * (2 - 1 * float(not outriggers)), fill=True, color='w'))
# plot medium green annuli for ant_metrics flags
if use_ant_metrics:
if ant_metrics_xants_frac_by_ant[ant] > 0:
ant_has_flag = True
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=6 * (2 - 1 * float(not outriggers)), fill=True, lw=0,
color=antm_color, alpha=ant_metrics_xants_frac_by_ant[ant]))
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=5 * (2 - 1 * float(not outriggers)), fill=True, color='w'))
# plot small red annuli for auto_metrics
if use_auto_metrics:
if ant in auto_ex_ants:
ant_has_flag = True
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=5 * (2 - 1 * float(not outriggers)), fill=True, lw=0, color=autom_color))
# plot black/white circles with black outlines for antennas
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=4 * (2 - 1 * float(not outriggers)), fill=True, color=['w', 'k'][ant_has_flag], ec='k'))
if not ant_has_flag:
unflagged_ants.append(ant)
# label antennas, using apriori statuses if available
try:
bgc = matplotlib.colors.to_rgb(status_colors[a_priori_statuses[ant]])
c = 'black' if (bgc[0]*0.299 + bgc[1]*0.587 + bgc[2]*0.114) > 186 / 256 else 'white'
except:
c = 'k'
bgc='white'
plt.text(hd.antpos[ant][0], hd.antpos[ant][1], str(ant), va='center', ha='center', color=c, backgroundcolor=bgc)
# label nodes
for node in sorted(set(list(nodes.values()))):
if not np.isnan(node) and not np.all(np.isnan(node_centers[node])):
plt.text(node_centers[node][0], node_centers[node][1], str(node), va='center', ha='center', bbox={'color': 'w', 'ec': 'k'})
# build legend
legend_objs = []
legend_labels = []
# use circles for annuli
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='k', markerfacecolor='w', markersize=13))
legend_labels.append(f'{len(unflagged_ants)} / {len(ants)} Total {["Core", "Outrigger"][outriggers]} Antennas Never Flagged')
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markerfacecolor='k', markersize=15))
legend_labels.append(f'{len(ants) - len(unflagged_ants)} Antennas {["Core", "Outrigger"][outriggers]} Flagged for Any Reason')
if use_auto_metrics:
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgewidth=2, markeredgecolor=autom_color, markersize=15))
legend_labels.append(f'{len([ant for ant in auto_ex_ants if ant in ants])} {["Core", "Outrigger"][outriggers]} Antennas Flagged by Auto Metrics')
if use_ant_metrics:
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgewidth=2, markeredgecolor=antm_color, markersize=15))
legend_labels.append(f'{np.round(np.sum([frac for ant, frac in ant_metrics_xants_frac_by_ant.items() if ant in ants]), 2)} Antenna-Nights on'
f'\n{np.sum([frac > 0 for ant, frac in ant_metrics_xants_frac_by_ant.items() if ant in ants])} {["Core", "Outrigger"][outriggers]} Antennas '
'Flagged by Ant Metrics\n(alpha indicates fraction of time)')
if use_redcal:
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgewidth=2, markeredgecolor=rc_color, markersize=15))
legend_labels.append(f'{np.round(np.sum(list(redcal_flagged_frac.values())), 2)} Antenna-Nights on'
f'\n{np.sum([frac > 0 for ant, frac in redcal_flagged_frac.items() if ant in ants])} {["Core", "Outrigger"][outriggers]} Antennas '
'Flagged by Redcal\n(alpha indicates fraction of time)')
# use rectangular patches for a priori statuses that appear in the array
for aps in sorted(list(set(list(a_priori_statuses.values())))):
if aps != 'Not Found':
legend_objs.append(plt.Circle((0, 0), radius=7, fill=True, color=status_colors[aps]))
legend_labels.append(f'A Priori Status:\n{aps} ({[status for ant, status in a_priori_statuses.items() if ant in ants].count(aps)} {["Core", "Outrigger"][outriggers]} Antennas)')
# label nodes as a white box with black outline
if len(node_centers) > 0:
legend_objs.append(matplotlib.patches.Patch(facecolor='w', edgecolor='k'))
legend_labels.append('Node Number')
if len(unused_ants) > 0:
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markerfacecolor='grey', markersize=15, alpha=.2))
legend_labels.append(f'Anntenna Not In Data')
plt.legend(legend_objs, legend_labels, ncol=2, fontsize='large', framealpha=1)
if outriggers:
pass
else:
plt.xlim([-200, 150])
plt.ylim([-150, 150])
# set axis equal and label everything
plt.axis('equal')
plt.tight_layout()
plt.title(f'Summary of {["Core", "Outrigger"][outriggers]} Antenna Statuses and Metrics on {JD}', size=20)
plt.xlabel("Antenna East-West Position (meters)", size=12)
plt.ylabel("Antenna North-South Position (meters)", size=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
xlim = plt.gca().get_xlim()
ylim = plt.gca().get_ylim()
# plot unused antennas
plt.autoscale(False)
for ant in unused_ants:
if nodes[ant] in node_centers:
plt.plot([hd.antpos[ant][0], node_centers[nodes[ant]][0]],
[hd.antpos[ant][1], node_centers[nodes[ant]][1]], 'k', alpha=.2, zorder=0)
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=4, fill=True, color='w', ec=None, alpha=1, zorder=0))
plt.gca().add_artist(plt.Circle(tuple(hd.antpos[ant][0:2]), radius=4, fill=True, color='grey', ec=None, alpha=.2, zorder=0))
if hd.antpos[ant][0] < xlim[1] and hd.antpos[ant][0] > xlim[0]:
if hd.antpos[ant][1] < ylim[1] and hd.antpos[ant][1] > ylim[0]:
plt.text(hd.antpos[ant][0], hd.antpos[ant][1], str(ant), va='center', ha='center', color='k', alpha=.2)
This plot shows all antennas, which nodes they are connected to, and their a priori statuses (as the highlight text of their antenna numbers). It may also show (depending on what is finished running):
auto_metrics
(red circle) for bandpass shape, overall power, temporal variability, or temporal discontinuities. This is done in a binary fashion for the whole night.ant_metrics
(green circle) as either dead (on either polarization) or crossed, with the transparency indicating the fraction of the night (i.e. number of files) that were flagged.redcal
(blue circle) for high chi^2, with the transparency indicating the fraction of the night (i.e. number of files) that were flagged. Note that the last fraction does not include antennas that were flagged before going into redcal due to their a priori status, for example.
core_ants = [ant for ant in ants if ant < 320]
outrigger_ants = [ant for ant in ants if ant >= 320]
Plot_Array(ants=core_ants, unused_ants=unused_ants, outriggers=False)
if len(outrigger_ants) > 0:
Plot_Array(ants=outrigger_ants, unused_ants=sorted(set(unused_ants + core_ants)), outriggers=True)
from hera_qm import __version__
print(__version__)
from hera_cal import __version__
print(__version__)
2.0.3.dev44+g7d4aa18 3.1.4.dev3+g68bd8c3