Full-Day Autocorrelation Checking¶
by Josh Dillon and Steven Murray, last updated May 28, 2024
This notebook is designed to assess per-day data quality from just autocorrelations, enabling a quick assessment of whether the day is worth pushing through further analysis. In particular, it is designed to look for times that are particularly contaminated by broadband RFI (e.g. from lightning), picking out fraction of days worth analyzing. It's output is a an a priori flag yaml file readable by hera_qm.metrics_io
functions read_a_priori_chan_flags()
, read_a_priori_int_flags()
, and read_a_priori_ant_flags()
.
Here's a set of links to skip to particular figures:
• Figure 1: Preliminary Array Flag Fraction Summary¶
• Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶
• Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶
import time
tstart = time.time()
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin # REQUIRED to have the compression plugins available
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import matplotlib
import copy
from scipy.ndimage import convolve
from hera_cal import io, utils
from hera_cal.smooth_cal import dpss_filters, solve_2D_DPSS
from hera_qm import ant_class, xrfi, metrics_io
from hera_qm.time_series_metrics import true_stretches, impose_max_flag_gap, metric_convolution_flagging
from hera_filters import dspec
import warnings
from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore') # get rid of red warnings
%config InlineBackend.figure_format = 'retina'
Parse input and output files¶
# get filenames
AUTO_FILE = os.environ.get("AUTO_FILE", None)
# AUTO_FILE = '/mnt/sn1/data2/2460458/zen.2460458.16881.sum.uvh5'
SUM_AUTOS_SUFFIX = os.environ.get("SUM_AUTOS_SUFFIX", 'sum.autos.uvh5')
DIFF_AUTOS_SUFFIX = os.environ.get("DIFF_AUTOS_SUFFIX", 'diff.autos.uvh5')
OUT_YAML_SUFFIX = os.environ.get("OUT_YAML_SUFFIX", '_apriori_flags.yaml')
OUT_YAML_DIR = os.environ.get("OUT_YAML_DIR", None)
# OUT_YAML_DIR = '/lustre/aoc/projects/hera/jsdillon/H6C/'
if OUT_YAML_DIR is None:
OUT_YAML_DIR = os.path.dirname(AUTO_FILE)
auto_sums_glob = '.'.join(AUTO_FILE.split('.')[:-4]) + '.*.' + SUM_AUTOS_SUFFIX
auto_diffs_glob = auto_sums_glob.replace(SUM_AUTOS_SUFFIX, DIFF_AUTOS_SUFFIX)
out_yaml_file = os.path.join(OUT_YAML_DIR, AUTO_FILE.split('.')[-5] + OUT_YAML_SUFFIX)
auto_sums = sorted(glob.glob(auto_sums_glob))
print(f'Found {len(auto_sums)} *.{SUM_AUTOS_SUFFIX} files starting with {auto_sums[0]}.')
auto_diffs = sorted(glob.glob(auto_diffs_glob))
print(f'Found {len(auto_diffs)} *.{DIFF_AUTOS_SUFFIX} files starting with {auto_diffs[0]}.')
Found 1571 *.sum.autos.uvh5 files starting with /mnt/sn1/data2/2460538/zen.2460538.16922.sum.autos.uvh5. Found 1571 *.diff.autos.uvh5 files starting with /mnt/sn1/data2/2460538/zen.2460538.16922.diff.autos.uvh5.
Parse settings¶
# bounds on zeros in spectra
good_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_GOOD", 2)))
suspect_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_SUSPECT", 8)))
# bounds on autocorrelation power
auto_power_good = (float(os.environ.get("AUTO_POWER_GOOD_LOW", 5)), float(os.environ.get("AUTO_POWER_GOOD_HIGH", 30)))
auto_power_suspect = (float(os.environ.get("AUTO_POWER_SUSPECT_LOW", 1)), float(os.environ.get("AUTO_POWER_SUSPECT_HIGH", 60)))
# bounds on autocorrelation slope
auto_slope_good = (float(os.environ.get("AUTO_SLOPE_GOOD_LOW", -0.4)), float(os.environ.get("AUTO_SLOPE_GOOD_HIGH", 0.4)))
auto_slope_suspect = (float(os.environ.get("AUTO_SLOPE_SUSPECT_LOW", -0.6)), float(os.environ.get("AUTO_SLOPE_SUSPECT_HIGH", 0.6)))
# bounds on autocorrelation RFI
auto_rfi_good = (0, float(os.environ.get("AUTO_RFI_GOOD", 1.5)))
auto_rfi_suspect = (0, float(os.environ.get("AUTO_RFI_SUSPECT", 2)))
# bounds on autocorrelation shape
auto_shape_good = (0, float(os.environ.get("AUTO_SHAPE_GOOD", 0.1)))
auto_shape_suspect = (0, float(os.environ.get("AUTO_SHAPE_SUSPECT", 0.2)))
# parse RFI settings for antenna flagging
RFI_DPSS_HALFWIDTH = float(os.environ.get("RFI_DPSS_HALFWIDTH", 300e-9)) # in s
RFI_NSIG = float(os.environ.get("RFI_NSIG", 6))
# parse settings for identifying mislabeled data by X-engine
BAD_XENGINE_ZCUT = float(os.environ.get("BAD_XENGINE_ZCUT", 10))
# DPSS settings for full-day filtering
FREQ_FILTER_SCALE = float(os.environ.get("FREQ_FILTER_SCALE", 5.0)) # in MHz
TIME_FILTER_SCALE = float(os.environ.get("TIME_FILTER_SCALE", 450.0)) # in s
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
# A priori flag settings
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
FM_freq_range = [FM_LOW_FREQ * 1e6, FM_HIGH_FREQ * 1e6]
MAX_SOLAR_ALT = float(os.environ.get("MAX_SOLAR_ALT", 0.0)) # in degrees
SMOOTHED_ABS_Z_THRESH = float(os.environ.get("SMOOTHED_ABS_Z_THRESH", 10))
WHOLE_DAY_FLAG_THRESH = float(os.environ.get("WHOLE_DAY_FLAG_THRESH", 0.5))
for setting in ['good_zeros_per_eo_spectrum', 'suspect_zeros_per_eo_spectrum', 'auto_power_good', 'auto_power_suspect',
'auto_slope_good', 'auto_slope_suspect', 'auto_rfi_good', 'auto_rfi_suspect',
'auto_shape_good', 'auto_shape_suspect', 'BAD_XENGINE_ZCUT', 'RFI_DPSS_HALFWIDTH', 'RFI_NSIG',
'FREQ_FILTER_SCALE', 'TIME_FILTER_SCALE', 'EIGENVAL_CUTOFF', 'FM_freq_range',
'MAX_SOLAR_ALT', 'SMOOTHED_ABS_Z_THRESH', 'WHOLE_DAY_FLAG_THRESH']:
print(f'{setting} = {eval(setting)}')
good_zeros_per_eo_spectrum = (0, 2) suspect_zeros_per_eo_spectrum = (0, 8) auto_power_good = (5.0, 30.0) auto_power_suspect = (1.0, 60.0) auto_slope_good = (-0.4, 0.4) auto_slope_suspect = (-0.6, 0.6) auto_rfi_good = (0, 1.5) auto_rfi_suspect = (0, 2.0) auto_shape_good = (0, 0.1) auto_shape_suspect = (0, 0.2) BAD_XENGINE_ZCUT = 10.0 RFI_DPSS_HALFWIDTH = 3e-07 RFI_NSIG = 4.0 FREQ_FILTER_SCALE = 5.0 TIME_FILTER_SCALE = 450.0 EIGENVAL_CUTOFF = 1e-12 FM_freq_range = [87500000.0, 108000000.0] MAX_SOLAR_ALT = 0.0 SMOOTHED_ABS_Z_THRESH = 10.0 WHOLE_DAY_FLAG_THRESH = 0.5
Classify Antennas and Find RFI Per-File¶
cache = {}
def auto_bl_zscores(data, flag_array, prior_class=None, cache={}):
'''This function computes z-score arrays for each delay-filtered autocorrelation, normalized by the expected noise.
Flagged times/channels for the whole array are given 0 weight in filtering and are np.nan in the z-score.'''
zscores = {}
int_time = 24 * 3600 * np.median(np.diff(data.times))
chan_res = np.median(np.diff(data.freqs))
int_count = int(int_time * chan_res)
for bl in data:
if utils.split_bl(bl)[0] != utils.split_bl(bl)[1]:
continue
if prior_class is not None:
if (prior_class[utils.split_bl(bl)[0]] == 'bad'):
continue
wgts = np.array(np.logical_not(flag_array), dtype=np.float64)
model, _, _ = dspec.fourier_filter(data.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',
suppression_factors=[1e-9], eigenval_cutoff=[1e-9], cache=cache)
res = data[bl] - model
sigma = np.abs(model) / np.sqrt(int_count / 2)
zscores[bl] = res / sigma
zscores[bl][flag_array] = np.nan
return zscores
def rfi_from_avg_autos(data, auto_bls_to_use, prior_flags=None, nsig=RFI_NSIG):
'''Average together all baselines in auto_bls_to_use, then find an RFI mask by looking for outliers after DPSS filtering.'''
# Compute int_count for all unflagged autocorrelations averaged together
int_time = 24 * 3600 * np.median(np.diff(data.times))
chan_res = np.median(np.diff(data.freqs))
int_count = int(int_time * chan_res) * len(auto_bls_to_use)
avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls_to_use], axis=0)}
# Flag RFI first with channel differences and then with DPSS
antenna_flags, _ = xrfi.flag_autos(avg_auto, int_count=int_count, nsig=(RFI_NSIG * 5))
if prior_flags is not None:
antenna_flags[(-1, -1, 'ee')] = prior_flags
_, rfi_flags = xrfi.flag_autos(avg_auto, int_count=int_count, flag_method='dpss_flagger',
flags=antenna_flags, freqs=data.freqs, filter_centers=[0],
filter_half_widths=[RFI_DPSS_HALFWIDTH], eigenval_cutoff=[1e-9], nsig=nsig)
return rfi_flags
def classify_autos_and_preliminary_rfi(auto_sum_file, auto_diff_file):
hd_sum = io.HERADataFastReader(auto_sum_file)
sum_autos, _, _ = hd_sum.read(read_flags=False, read_nsamples=False)
hd_diff = io.HERADataFastReader(auto_diff_file)
diff_autos, _, _ = hd_diff.read(read_flags=False, read_nsamples=False, dtype=np.complex64, fix_autos_func=np.real)
ants = sorted(set([ant for bl in hd_sum.bls for ant in utils.split_bl(bl)]))
# check for zeros in the evens or odds
zeros_class = ant_class.even_odd_zeros_checker(sum_autos, diff_autos, good=good_zeros_per_eo_spectrum, suspect=suspect_zeros_per_eo_spectrum)
# check for problems in auto power or slope
auto_power_class = ant_class.auto_power_checker(sum_autos, good=auto_power_good, suspect=auto_power_suspect)
auto_slope_class = ant_class.auto_slope_checker(sum_autos, good=auto_slope_good, suspect=auto_slope_suspect, edge_cut=100, filt_size=17)
overall_class = zeros_class + auto_power_class + auto_slope_class
if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
# find initial set of flags
antenna_flags, array_flags = xrfi.flag_autos(sum_autos, flag_method="channel_diff_flagger", nsig=RFI_NSIG * 5,
antenna_class=overall_class, flag_broadcast_thresh=.5)
for key in antenna_flags:
antenna_flags[key] = array_flags
_, array_flags = xrfi.flag_autos(sum_autos, freqs=sum_autos.freqs, flag_method="dpss_flagger",
nsig=RFI_NSIG, antenna_class=overall_class,
filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH],
eigenval_cutoff=[1e-9], flags=antenna_flags, mode='dpss_matrix',
cache=cache, flag_broadcast_thresh=.5)
# check for non-noiselike x-engine diffs
xengine_diff_class = ant_class.non_noiselike_diff_by_xengine_checker(sum_autos, diff_autos, flag_waterfall=array_flags,
antenna_class=overall_class,
xengine_chans=96, bad_xengine_zcut=BAD_XENGINE_ZCUT)
# update overall_class and return if all antennas are bad
overall_class += xengine_diff_class
if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
# Iteratively develop RFI mask, excess RFI classification, and autocorrelation shape classification
stage = 1
rfi_flags = np.array(array_flags)
prior_end_states = set()
while True:
# compute DPSS-filtered z-scores with current array-wide RFI mask
zscores = auto_bl_zscores(sum_autos, rfi_flags, cache=cache,
prior_class=(auto_power_class + auto_slope_class + zeros_class + xengine_diff_class))
rms = {bl: np.nanmean(zscores[bl]**2)**.5 if np.any(np.isfinite(zscores[bl])) else np.inf for bl in zscores}
# figure out which autos to use for finding new set of flags
candidate_autos = [bl for bl in sum_autos if overall_class[utils.split_bl(bl)[0]] != 'bad']
if stage == 1:
# use best half of the unflagged antennas
med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])
autos_to_use = [bl for bl in candidate_autos if rms[bl] <= med_rms]
elif stage == 2:
# use all unflagged antennas which are auto RFI good, or the best half, whichever is larger
med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])
best_half_autos = [bl for bl in candidate_autos if rms[bl] <= med_rms]
good_autos = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')
and (auto_rfi_class[utils.split_bl(bl)[0]] == 'good')]
autos_to_use = (best_half_autos if len(best_half_autos) > len(good_autos) else good_autos)
elif stage == 3:
# use all unflagged antennas which are auto RFI good or suspect
autos_to_use = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')]
# compute new RFI flags
rfi_flags = rfi_from_avg_autos(sum_autos, autos_to_use)
# perform auto shape and RFI classification
overall_class = auto_power_class + auto_slope_class + zeros_class + xengine_diff_class
auto_rfi_class = ant_class.antenna_bounds_checker(rms, good=auto_rfi_good, suspect=auto_rfi_suspect, bad=(0, np.inf))
overall_class += auto_rfi_class
auto_shape_class = ant_class.auto_shape_checker(sum_autos, good=auto_shape_good, suspect=auto_shape_suspect,
flag_spectrum=np.sum(rfi_flags, axis=0).astype(bool),
antenna_class=overall_class)
overall_class += auto_shape_class
# check for convergence by seeing whether we've previously gotten to this number of flagged antennas and channels
if stage == 3:
if (len(overall_class.bad_ants), np.sum(rfi_flags)) in prior_end_states:
break
prior_end_states.add((len(overall_class.bad_ants), np.sum(rfi_flags)))
else:
stage += 1
# return all flagged if all antennnas are bad, otherwise return overall class and rfi_flags
if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
else:
return overall_class, rfi_flags
classifications = {}
preliminary_rfi_flags = {}
with warnings.catch_warnings():
warnings.simplefilter("ignore")
for auto_sum_file, auto_diff_file in list(zip(auto_sums, auto_diffs)):
classifications[auto_sum_file], preliminary_rfi_flags[auto_sum_file] = classify_autos_and_preliminary_rfi(auto_sum_file, auto_diff_file)
all_ants = set([ant for auto_sum_file in classifications for ant in classifications[auto_sum_file]])
ant_flag_fracs = {ant: 0 for ant in all_ants}
for classification in classifications.values():
for ant in classification.bad_ants:
ant_flag_fracs[ant] += 1
ant_flag_fracs = {ant: ant_flag_fracs[ant] / len(classifications) for ant in all_ants}
def flag_frac_array_plot():
fig, axes = plt.subplots(1, 2, figsize=(14, 8), dpi=100, gridspec_kw={'width_ratios': [2, 1]})
def flag_frac_panel(ax, antnums, radius=7, legend=False):
ang_dict = {'Jee': (225, 405), 'Jnn': (45, 225)}
hd_sum = io.HERADataFastReader(auto_sums[-1])
xpos = np.array([hd_sum.antpos[ant[0]][0] for ant in all_ants if ant[0] in antnums])
ypos = np.array([hd_sum.antpos[ant[0]][1] for ant in all_ants if ant[0] in antnums])
scatter = ax.scatter(xpos, ypos, c='w', s=0)
for ant in all_ants:
antnum, pol = ant
if antnum in antnums:
ax.add_artist(matplotlib.patches.Wedge(tuple(hd_sum.antpos[antnum][0:2]), radius, *ang_dict[pol], color='grey'))
flag_frac = ant_flag_fracs[ant]
if flag_frac > .05:
ax.add_artist(matplotlib.patches.Wedge(tuple(hd_sum.antpos[antnum][0:2]), radius * np.sqrt(flag_frac), *ang_dict[pol], color='r'))
ax.text(hd_sum.antpos[antnum][0], hd_sum.antpos[antnum][1], str(antnum), color='w', va='center', ha='center', zorder=100)
ax.axis('equal')
ax.set_xlim([np.min(xpos) - radius * 2, np.max(xpos) + radius * 2])
ax.set_ylim([np.min(ypos) - radius * 2, np.max(ypos) + radius * 2])
ax.set_xlabel("East-West Position (meters)", size=12)
ax.set_ylabel("North-South Position (meters)", size=12)
if legend:
legend_objs = []
legend_labels = []
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='grey', markerfacecolor='grey', markersize=15))
unflagged_nights = lambda pol: np.sum([1 - ant_flag_fracs[ant] for ant in all_ants if ant[-1] == pol])
legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} unflagged {pol[-1]}-polarized\nantenna-nights.' for pol in ['Jee', 'Jnn']]))
legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='red', markerfacecolor='red', markersize=15))
unflagged_nights = lambda pol: np.sum([ant_flag_fracs[ant] for ant in all_ants if ant[-1] == pol])
legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} flagged {pol[-1]}-polarized\nantenna-nights.' for pol in ['Jee', 'Jnn']]))
ax.legend(legend_objs, legend_labels, ncol=1, fontsize=12)
flag_frac_panel(axes[0], sorted(set([ant[0] for ant in all_ants if ant[0] < 320])), radius=7)
flag_frac_panel(axes[1], sorted(set([ant[0] for ant in all_ants if ant[0] >= 320])), radius=50, legend=True)
plt.tight_layout()
Figure 1: Preliminary Array Flag Fraction Summary¶
Per-antenna flagging fraction of data based purely on metrics that only use autocorrelations. This is likely an underestimate of flags, since it ignores low correlation, cross-polarized antennas, and high redcal $\chi^2$, among other factors.
flag_frac_array_plot()
Load and Average Unflagged Autocorrelations¶
min_flag_frac = np.min(list(ant_flag_fracs.values()))
least_flagged_ants = sorted([ant for ant in all_ants if ant_flag_fracs[ant] == min_flag_frac])
least_flagged_autos = [utils.join_bl(ant, ant) for ant in least_flagged_ants]
avg_autos = {}
times = []
for auto_sum_file in auto_sums:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
hd_sum = io.HERADataFastReader(auto_sum_file)
sum_autos, _, _ = hd_sum.read(bls=least_flagged_autos, read_flags=False, read_nsamples=False)
avg_autos[auto_sum_file] = np.mean([sum_autos[bl] for bl in sum_autos], axis=0)
times.extend(hd_sum.times)
times = np.array(times)
freqs = hd_sum.freqs
flags = np.vstack([flag for flag in preliminary_rfi_flags.values()])
avg_auto = np.vstack([auto for auto in avg_autos.values()])
def flag_FM(flags, freqs, freq_range=[87.5e6, 108e6]):
'''Apply flags to all frequencies within freq_range (in Hz).'''
flags[:, np.logical_and(freqs >= freq_range[0], freqs <= freq_range[1])] = True
def flag_sun(flags, times, max_solar_alt=0):
'''Apply flags to all times where the solar altitude is greater than max_solar_alt (in degrees).'''
solar_altitudes_degrees = utils.get_sun_alt(times)
flags[solar_altitudes_degrees >= max_solar_alt, :] = True
flag_FM(flags, freqs, freq_range=FM_freq_range)
solar_flags = np.zeros_like(flags)
flag_sun(solar_flags, times, max_solar_alt=MAX_SOLAR_ALT)
flags |= solar_flags
all_flagged_times = np.all(flags, axis=1)
DPSS Filter Average Autocorrlations¶
def predict_auto_noise(auto, dt, df, nsamples=1):
'''Predict noise on an (antenna-averaged) autocorrelation. The product of Delta t and Delta f
must be unitless. For N autocorrelations averaged together, use nsamples=N.'''
int_count = int(dt * df) * nsamples
return np.abs(auto) / np.sqrt(int_count / 2)
# Figure out noise and weights
dt = np.median(np.diff(times))
int_time = 24 * 3600 * dt
chan_res = np.median(np.diff(freqs))
noise = predict_auto_noise(avg_auto, int_time, chan_res, nsamples=len(least_flagged_ants))
wgts = np.where(flags, 0, noise**-2)
# enable handling of missing file gaps
time_grid = np.arange(times[0], times[-1] + dt, dt)
time_indices = {i: np.searchsorted(time_grid, t) for i, t in enumerate(times)}
avg_auto_on_grid = np.zeros((len(time_grid), len(freqs)), dtype=float)
wgts_on_grid = np.zeros((len(time_grid), len(freqs)), dtype=float)
flags_on_grid = np.ones((len(time_grid), len(freqs)), dtype=bool)
for i in time_indices:
avg_auto_on_grid[time_indices[i], :] = avg_auto[i, :]
wgts_on_grid[time_indices[i], :] = wgts[i, :]
flags_on_grid[time_indices[i], :] = flags[i, :]
all_flagged_times_on_grid = np.all(flags_on_grid, axis=1)
time_filters, freq_filters = dpss_filters(freqs=freqs, # Hz
times=time_grid, # JD
freq_scale=FREQ_FILTER_SCALE,
time_scale=TIME_FILTER_SCALE,
eigenval_cutoff=EIGENVAL_CUTOFF)
model, fit_info = solve_2D_DPSS(avg_auto_on_grid, wgts_on_grid, time_filters, freq_filters, method='lu_solve')
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
noise_model = predict_auto_noise(np.abs(model), int_time, chan_res, nsamples=len(least_flagged_ants))
zscore = ((avg_auto_on_grid - model) / noise_model).real
def plot_z_score(flags, zscore):
plt.figure(figsize=(14,10), dpi=100)
extent = [freqs[0]/1e6, freqs[-1]/1e6, times[-1] - int(times[0]), times[0] - int(times[0])]
plt.imshow(np.where(flags, np.nan, zscore.real), aspect='auto', cmap='bwr', interpolation='none', vmin=-SMOOTHED_ABS_Z_THRESH, vmax=SMOOTHED_ABS_Z_THRESH, extent=extent)
plt.colorbar(location='top', label='z score', extend='both')
plt.xlabel('Frequency (MHz)')
plt.ylabel(f'JD - {int(times[0])}')
plt.tight_layout()
Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶
This plot shows the z-score of a DPSS-filtered, deeply averaged autocorrelation, where the noise is inferred from the integration time, channel width, and DPSS model. DPSS was performed using the per-file RFI flagging analogous to that used in the file_calibration notebook, which is generally insensitive to broadband RFI.
plot_z_score(flags_on_grid, zscore)
Find Bad Time Ranges¶
# Propose flags for night times when there's too much temporal structure due to, e.g. lightning
sigma = TIME_FILTER_SCALE / int_time
metric = np.nanmean(np.where(flags_on_grid, np.nan, np.abs(zscore)), axis=1)
kernel = np.exp(-np.arange(-len(metric) // 2, len(metric) // 2 + 1)**2 / 2 / sigma**2)
kernel /= np.sum(kernel)
convolved_metric = np.full_like(metric, np.nan)
convolved_metric[~all_flagged_times_on_grid] = convolve(metric[~all_flagged_times_on_grid], kernel, mode='reflect')
apriori_time_flags = np.ones_like(metric, dtype=bool)
apriori_time_flags[~all_flagged_times_on_grid] = metric_convolution_flagging(metric[~all_flagged_times_on_grid], convolved_metric[~all_flagged_times_on_grid] >= SMOOTHED_ABS_Z_THRESH,
[0, SMOOTHED_ABS_Z_THRESH], sigma=(TIME_FILTER_SCALE / int_time), max_flag_gap=0)
Mean of empty slice
# Flag whole day if too much of the day is flagged
apriori_flag_frac = np.mean(apriori_time_flags[~all_flagged_times_on_grid])
if apriori_flag_frac > WHOLE_DAY_FLAG_THRESH:
print(f'A priori time flag fraction of {apriori_flag_frac:.2%} is greater than {WHOLE_DAY_FLAG_THRESH:.2%}... Flagging whole day.')
apriori_time_flags = np.ones_like(apriori_time_flags)
def apriori_flag_plot():
plt.figure(figsize=(14, 6))
plt.semilogy(time_grid - int(time_grid[0]), np.nanmean(np.where(flags_on_grid, np.nan, np.abs(zscore)), axis=1), label='Average |z| Over Frequency')
plt.semilogy(time_grid - int(time_grid[0]), convolved_metric, label=f'Convolved on {TIME_FILTER_SCALE}-second timescale')
plt.axhline(SMOOTHED_ABS_Z_THRESH, color='k', ls='--', label='Threshold on Convolved |z|')
for i, apf in enumerate(true_stretches(apriori_time_flags)):
plt.axvspan((time_grid - int(time_grid[0]))[apf.start], (time_grid - int(time_grid[0]))[apf.stop - 1], color='r', zorder=0, alpha=.3,
label=('Proposed A Priori Flags' if i == 0 else None))
plt.legend()
plt.xlabel(f'JD - {int(time_grid[0])}')
plt.ylabel('Frequency Averaged |z-score|')
plt.tight_layout()
Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶
This plot shows the average (over frequency) magnitude of z-scores as a function of time. This metric is smoothed to pick out ranges of times where the DPSS residual reveals persistent temporal structure. Flags due to the sun being above the horizon are also shown. The unflagged range of times is required to be contiguous.
apriori_flag_plot()
Mean of empty slice
Write a priori flags to a yaml¶
Also writing as a priori flags channels that are 100% flagged and antennas that are 100% flagged.
dt = int_time / 3600 / 24 / 2
out_yml_str = 'JD_flags: ' + str([[time_grid[apf][0] - dt / 2, time_grid[apf][-1] + dt / 2] for apf in true_stretches(apriori_time_flags)])
out_yml_str += '\n\nfreq_flags: ' + str([[freqs[apf][0] - chan_res / 2, freqs[apf][-1] + chan_res / 2] for apf in true_stretches(np.all(flags, axis=0))])
out_yml_str += '\n\nex_ants: ' + str(sorted([ant for ant in all_ants if ant_flag_fracs[ant] == 1])).replace("'", "").replace('(', '[').replace(')', ']')
out_yml_str += '\n\nall_ant: ' + str(sorted(all_ants)).replace("'", "").replace('(', '[').replace(')', ']')
out_yml_str += f'\n\njd_range: [{times.min()}, {times.max()}]'
out_yml_str += f'\n\nfreq_range: [{freqs.min()}, {freqs.max()}]'
print(f'Writing the following to {out_yaml_file}\n' + '-' * (25 + len(out_yaml_file)))
print(out_yml_str)
with open(out_yaml_file, 'w') as outfile:
outfile.writelines(out_yml_str)
Writing the following to /mnt/sn1/data2/2460538/2460538_apriori_flags.yaml -------------------------------------------------------------------------- JD_flags: [[2460538.169133933, 2460538.17019649]] freq_flags: [[62484741.2109375, 62606811.5234375], [87509155.2734375, 108016967.7734375], [124984741.2109375, 125106811.5234375], [141464233.3984375, 141586303.7109375], [142074584.9609375, 142318725.5859375], [147445678.7109375, 147567749.0234375], [187484741.2109375, 187606811.5234375]] ex_ants: [[8, Jee], [8, Jnn], [15, Jnn], [18, Jee], [18, Jnn], [22, Jnn], [27, Jee], [27, Jnn], [28, Jee], [31, Jnn], [33, Jnn], [34, Jee], [45, Jee], [46, Jee], [47, Jee], [47, Jnn], [61, Jee], [61, Jnn], [63, Jee], [63, Jnn], [64, Jee], [64, Jnn], [73, Jnn], [77, Jee], [77, Jnn], [78, Jee], [78, Jnn], [86, Jnn], [87, Jee], [89, Jee], [89, Jnn], [91, Jee], [91, Jnn], [97, Jee], [97, Jnn], [104, Jnn], [105, Jee], [105, Jnn], [106, Jee], [106, Jnn], [108, Jee], [108, Jnn], [109, Jnn], [120, Jee], [124, Jee], [124, Jnn], [125, Jee], [125, Jnn], [126, Jee], [126, Jnn], [132, Jee], [132, Jnn], [134, Jee], [134, Jnn], [161, Jnn], [170, Jee], [171, Jnn], [176, Jee], [176, Jnn], [177, Jee], [177, Jnn], [178, Jee], [178, Jnn], [179, Jee], [179, Jnn], [180, Jnn], [182, Jee], [199, Jnn], [200, Jee], [201, Jnn], [218, Jnn], [232, Jee], [241, Jee], [241, Jnn], [242, Jee], [242, Jnn], [243, Jee], [243, Jnn], [251, Jee], [272, Jee], [272, Jnn], [325, Jee], [325, Jnn], [329, Jee], [329, Jnn], [332, Jee], [332, Jnn], [333, Jee], [333, Jnn]] all_ant: [[3, Jee], [3, Jnn], [4, Jee], [4, Jnn], [5, Jee], [5, Jnn], [7, Jee], [7, Jnn], [8, Jee], [8, Jnn], [9, Jee], [9, Jnn], [10, Jee], [10, Jnn], [15, Jee], [15, Jnn], [16, Jee], [16, Jnn], [17, Jee], [17, Jnn], [18, Jee], [18, Jnn], [19, Jee], [19, Jnn], [20, Jee], [20, Jnn], [21, Jee], [21, Jnn], [22, Jee], [22, Jnn], [27, Jee], [27, Jnn], [28, Jee], [28, Jnn], [29, Jee], [29, Jnn], [30, Jee], [30, Jnn], [31, Jee], [31, Jnn], [32, Jee], [32, Jnn], [33, Jee], [33, Jnn], [34, Jee], [34, Jnn], [35, Jee], [35, Jnn], [36, Jee], [36, Jnn], [37, Jee], [37, Jnn], [38, Jee], [38, Jnn], [40, Jee], [40, Jnn], [41, Jee], [41, Jnn], [42, Jee], [42, Jnn], [45, Jee], [45, Jnn], [46, Jee], [46, Jnn], [47, Jee], [47, Jnn], [48, Jee], [48, Jnn], [49, Jee], [49, Jnn], [50, Jee], [50, Jnn], [51, Jee], [51, Jnn], [52, Jee], [52, Jnn], [53, Jee], [53, Jnn], [54, Jee], [54, Jnn], [55, Jee], [55, Jnn], [56, Jee], [56, Jnn], [57, Jee], [57, Jnn], [61, Jee], [61, Jnn], [62, Jee], [62, Jnn], [63, Jee], [63, Jnn], [64, Jee], [64, Jnn], [65, Jee], [65, Jnn], [66, Jee], [66, Jnn], [67, Jee], [67, Jnn], [68, Jee], [68, Jnn], [69, Jee], [69, Jnn], [70, Jee], [70, Jnn], [71, Jee], [71, Jnn], [72, Jee], [72, Jnn], [73, Jee], [73, Jnn], [77, Jee], [77, Jnn], [78, Jee], [78, Jnn], [79, Jee], [79, Jnn], [80, Jee], [80, Jnn], [81, Jee], [81, Jnn], [82, Jee], [82, Jnn], [83, Jee], [83, Jnn], [84, Jee], [84, Jnn], [85, Jee], [85, Jnn], [86, Jee], [86, Jnn], [87, Jee], [87, Jnn], [88, Jee], [88, Jnn], [89, Jee], [89, Jnn], [90, Jee], [90, Jnn], [91, Jee], [91, Jnn], [92, Jee], [92, Jnn], [93, Jee], [93, Jnn], [94, Jee], [94, Jnn], [95, Jee], [95, Jnn], [96, Jee], [96, Jnn], [97, Jee], [97, Jnn], [98, Jee], [98, Jnn], [99, Jee], [99, Jnn], [100, Jee], [100, Jnn], [101, Jee], [101, Jnn], [102, Jee], [102, Jnn], [103, Jee], [103, Jnn], [104, Jee], [104, Jnn], [105, Jee], [105, Jnn], [106, Jee], [106, Jnn], [107, Jee], [107, Jnn], [108, Jee], [108, Jnn], [109, Jee], [109, Jnn], [110, Jee], [110, Jnn], [111, Jee], [111, Jnn], [112, Jee], [112, Jnn], [113, Jee], [113, Jnn], [114, Jee], [114, Jnn], [115, Jee], [115, Jnn], [116, Jee], [116, Jnn], [117, Jee], [117, Jnn], [118, Jee], [118, Jnn], [119, Jee], [119, Jnn], [120, Jee], [120, Jnn], [121, Jee], [121, Jnn], [122, Jee], [122, Jnn], [123, Jee], [123, Jnn], [124, Jee], [124, Jnn], [125, Jee], [125, Jnn], [126, Jee], [126, Jnn], [127, Jee], [127, Jnn], [128, Jee], [128, Jnn], [129, Jee], [129, Jnn], [130, Jee], [130, Jnn], [131, Jee], [131, Jnn], [132, Jee], [132, Jnn], [133, Jee], [133, Jnn], [134, Jee], [134, Jnn], [135, Jee], [135, Jnn], [136, Jee], [136, Jnn], [137, Jee], [137, Jnn], [138, Jee], [138, Jnn], [139, Jee], [139, Jnn], [140, Jee], [140, Jnn], [141, Jee], [141, Jnn], [142, Jee], [142, Jnn], [143, Jee], [143, Jnn], [144, Jee], [144, Jnn], [145, Jee], [145, Jnn], [146, Jee], [146, Jnn], [147, Jee], [147, Jnn], [148, Jee], [148, Jnn], [149, Jee], [149, Jnn], [150, Jee], [150, Jnn], [151, Jee], [151, Jnn], [152, Jee], [152, Jnn], [153, Jee], [153, Jnn], [154, Jee], [154, Jnn], [155, Jee], [155, Jnn], [159, Jee], [159, Jnn], [160, Jee], [160, Jnn], [161, Jee], [161, Jnn], [162, Jee], [162, Jnn], [163, Jee], [163, Jnn], [164, Jee], [164, Jnn], [165, Jee], [165, Jnn], [166, Jee], [166, Jnn], [167, Jee], [167, Jnn], [168, Jee], [168, Jnn], [169, Jee], [169, Jnn], [170, Jee], [170, Jnn], [171, Jee], [171, Jnn], [172, Jee], [172, Jnn], [173, Jee], [173, Jnn], [174, Jee], [174, Jnn], [175, Jee], [175, Jnn], [176, Jee], [176, Jnn], [177, Jee], [177, Jnn], [178, Jee], [178, Jnn], [179, Jee], [179, Jnn], [180, Jee], [180, Jnn], [181, Jee], [181, Jnn], [182, Jee], [182, Jnn], [183, Jee], [183, Jnn], [184, Jee], [184, Jnn], [185, Jee], [185, Jnn], [186, Jee], [186, Jnn], [187, Jee], [187, Jnn], [188, Jee], [188, Jnn], [189, Jee], [189, Jnn], [190, Jee], [190, Jnn], [191, Jee], [191, Jnn], [192, Jee], [192, Jnn], [193, Jee], [193, Jnn], [194, Jee], [194, Jnn], [195, Jee], [195, Jnn], [196, Jee], [196, Jnn], [197, Jee], [197, Jnn], [198, Jee], [198, Jnn], [199, Jee], [199, Jnn], [200, Jee], [200, Jnn], [201, Jee], [201, Jnn], [202, Jee], [202, Jnn], [204, Jee], [204, Jnn], [205, Jee], [205, Jnn], [206, Jee], [206, Jnn], [207, Jee], [207, Jnn], [208, Jee], [208, Jnn], [209, Jee], [209, Jnn], [210, Jee], [210, Jnn], [211, Jee], [211, Jnn], [212, Jee], [212, Jnn], [213, Jee], [213, Jnn], [214, Jee], [214, Jnn], [215, Jee], [215, Jnn], [216, Jee], [216, Jnn], [217, Jee], [217, Jnn], [218, Jee], [218, Jnn], [220, Jee], [220, Jnn], [221, Jee], [221, Jnn], [222, Jee], [222, Jnn], [223, Jee], [223, Jnn], [224, Jee], [224, Jnn], [225, Jee], [225, Jnn], [226, Jee], [226, Jnn], [227, Jee], [227, Jnn], [228, Jee], [228, Jnn], [229, Jee], [229, Jnn], [231, Jee], [231, Jnn], [232, Jee], [232, Jnn], [233, Jee], [233, Jnn], [234, Jee], [234, Jnn], [235, Jee], [235, Jnn], [237, Jee], [237, Jnn], [238, Jee], [238, Jnn], [239, Jee], [239, Jnn], [240, Jee], [240, Jnn], [241, Jee], [241, Jnn], [242, Jee], [242, Jnn], [243, Jee], [243, Jnn], [244, Jee], [244, Jnn], [245, Jee], [245, Jnn], [246, Jee], [246, Jnn], [250, Jee], [250, Jnn], [251, Jee], [251, Jnn], [252, Jee], [252, Jnn], [253, Jee], [253, Jnn], [255, Jee], [255, Jnn], [256, Jee], [256, Jnn], [261, Jee], [261, Jnn], [262, Jee], [262, Jnn], [266, Jee], [266, Jnn], [267, Jee], [267, Jnn], [268, Jee], [268, Jnn], [269, Jee], [269, Jnn], [270, Jee], [270, Jnn], [272, Jee], [272, Jnn], [281, Jee], [281, Jnn], [282, Jee], [282, Jnn], [283, Jee], [283, Jnn], [285, Jee], [285, Jnn], [295, Jee], [295, Jnn], [320, Jee], [320, Jnn], [321, Jee], [321, Jnn], [323, Jee], [323, Jnn], [324, Jee], [324, Jnn], [325, Jee], [325, Jnn], [326, Jee], [326, Jnn], [327, Jee], [327, Jnn], [328, Jee], [328, Jnn], [329, Jee], [329, Jnn], [331, Jee], [331, Jnn], [332, Jee], [332, Jnn], [333, Jee], [333, Jnn], [336, Jee], [336, Jnn], [340, Jee], [340, Jnn]] jd_range: [2460538.169161895, 2460538.520476798] freq_range: [46920776.3671875, 234298706.0546875]
# check output yaml file
flagged_indices = metrics_io.read_a_priori_int_flags(out_yaml_file, time_grid)
for i, apf in enumerate(apriori_time_flags):
if i in flagged_indices:
assert apf
else:
assert not apf
flagged_chans = metrics_io.read_a_priori_chan_flags(out_yaml_file, freqs=freqs)
for chan in range(len(freqs)):
if chan in flagged_chans:
assert np.all(flags[:, chan])
else:
assert not np.all(flags[:, chan])
flagged_ants = metrics_io.read_a_priori_ant_flags(out_yaml_file)
for ant in all_ants:
if ant_flag_fracs[ant] == 1:
assert ant in flagged_ants
else:
assert ant not in flagged_ants
Metadata¶
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
exec(f'from {repo} import __version__')
print(f'{repo}: {__version__}')
hera_cal: 3.6.dev110+gcc0a13d hera_qm: 2.1.5.dev6+g23b7cf7 hera_filters: 0.1.5
hera_notebook_templates: 0.1.dev734+g90f16f4 pyuvdata: 2.4.2
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 384.61 minutes.