Second Round of Full Day RFI Flagging¶
by Josh Dillon, last updated July 31, 2023
This notebook is synthesizes information from individual delay_filtered_average_zscore notebooks to find low-level RFI and flag it. That notebook takes smooth_cal
ibrated data, redundantly averages it, performs a high-pass delay filter, and then incoherently averages across baselines, creating a per-polarization z-score. This notebook then takes that whole night of z-scores and finds a new set of flags to both add to the smooth_cal
files, which are updated in place, and to write down as new UVFlag
waterfall-type .h5
files.
Here's a set of links to skip to particular figures and tables:
• Figure 1: Waterfall of Maximum z-Score of Either Polarization Before Round 2 Flagging¶
• Figure 2: Histogram of z-scores¶
• Figure 3: Waterfall of Maximum z-Score of Either Polarization After Round 2 Flagging¶
• Figure 4: Spectra of Time-Averaged z-Scores¶
• Figure 5: Summary of Flags Before and After Round 2 Flagging¶
import time
tstart = time.time()
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin # REQUIRED to have the compression plugins available
import numpy as np
import glob
import matplotlib.pyplot as plt
import matplotlib
import copy
import warnings
from pyuvdata import UVFlag, UVCal
from hera_cal import utils
from hera_qm import xrfi
from hera_qm.time_series_metrics import true_stretches
from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore') # get rid of red warnings
%config InlineBackend.figure_format = 'retina'
# get input data file names
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459861/zen.2459861.25297.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
# get input and output suffixes
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ZSCORE_SUFFIX = os.environ.get("ZSCORE_SUFFIX", 'sum.red_avg_zscore.h5')
FLAG_WATERFALL2_SUFFIX = os.environ.get("FLAG_WATERFALL2_SUFFIX", 'sum.flag_waterfall_round_2.h5')
OUT_YAML_SUFFIX = os.environ.get("OUT_YAML_SUFFIX", '_aposteriori_flags.yaml')
OUT_YAML_DIR = os.environ.get("OUT_YAML_DIR", None)
# build globs
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, SMOOTH_CAL_SUFFIX)
zscore_glob = sum_glob.replace(SUM_SUFFIX, ZSCORE_SUFFIX)
# build out yaml file
if OUT_YAML_DIR is None:
OUT_YAML_DIR = os.path.dirname(SUM_FILE)
out_yaml_file = os.path.join(OUT_YAML_DIR, SUM_FILE.split('.')[-4] + OUT_YAML_SUFFIX)
# get flagging parameters
Z_THRESH = float(os.environ.get("Z_THRESH", 5))
WS_Z_THRESH = float(os.environ.get("WS_Z_THRESH", 4))
AVG_Z_THRESH = float(os.environ.get("AVG_Z_THRESH", 1))
MAX_FREQ_FLAG_FRAC = float(os.environ.get("MAX_FREQ_FLAG_FRAC", .25))
MAX_TIME_FLAG_FRAC = float(os.environ.get("MAX_TIME_FLAG_FRAC", .1))
for setting in ['Z_THRESH', 'WS_Z_THRESH', 'AVG_Z_THRESH', 'MAX_FREQ_FLAG_FRAC', 'MAX_TIME_FLAG_FRAC']:
print(f'{setting} = {eval(setting)}')
Z_THRESH = 5.0 WS_Z_THRESH = 4.0 AVG_Z_THRESH = 1.0 MAX_FREQ_FLAG_FRAC = 0.25 MAX_TIME_FLAG_FRAC = 0.1
Load z-scores¶
# load z-scores
zscore_files = sorted(glob.glob(zscore_glob))
print(f'Found {len(zscore_files)} *.{ZSCORE_SUFFIX} files starting with {zscore_files[0]}.')
uvf = UVFlag(zscore_files, use_future_array_shapes=True)
Found 1572 *.sum.red_avg_zscore.h5 files starting with /mnt/sn1/data1/2460457/zen.2460457.16886.sum.red_avg_zscore.h5.
# get calibration solution files
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{SMOOTH_CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 1572 *.sum.smooth.calfits files starting with /mnt/sn1/data1/2460457/zen.2460457.16886.sum.smooth.calfits.
assert len(zscore_files) == len(cal_files)
# extract z-scores and correct by a single number per polarization to account for biases created by filtering
zscore = {pol: uvf.metric_array[:, :, np.argwhere(uvf.polarization_array == utils.polstr2num(pol, x_orientation=uvf.x_orientation))[0][0]] for pol in ['ee', 'nn']}
zscore = {pol: zscore[pol] - np.nanmedian(zscore[pol]) for pol in zscore}
freqs = uvf.freq_array
times = uvf.time_array
extent = [freqs[0] / 1e6, freqs[-1] / 1e6, times[-1] - int(times[0]), times[0] - int(times[0])]
def plot_max_z_score(zscore, flags=None):
if flags is None:
flags = np.any(~np.isfinite(list(zscore.values())), axis=0)
plt.figure(figsize=(14,10), dpi=100)
plt.imshow(np.where(flags, np.nan, np.nanmax([zscore['ee'], zscore['nn']], axis=0)), aspect='auto',
cmap='coolwarm', interpolation='none', vmin=-10, vmax=10, extent=extent)
plt.colorbar(location='top', label='Max z-score of either polarization', extend='both', aspect=40, pad=.02)
plt.xlabel('Frequency (MHz)')
plt.ylabel(f'JD - {int(times[0])}')
plt.tight_layout()
Figure 1: Waterfall of Maximum z-Score of Either Polarization Before Round 2 Flagging¶
Shows the worse of the two results from delay_filtered_average_zscore from either polarization. Dips near flagged channels are expected, due to overfitting of noise. Positive-going excursions are problematic and likely evidence of RFI.
plot_max_z_score(zscore)
All-NaN axis encountered