Full Day Systematics Inspection¶
by Tyler Cox and Josh Dillon, last updated May 10, 2024
This notebooks is designed to inspect the full day delay spectra for a small number of redundantly-averaged baselines, by inpainting in frequency and time, and performing delay filtering, cross-talk filtering, and main-beam fringe rate filtering. It is based on a similar notebook written by Josh Dillon for H1C analysis.
More specifically, it looks at 1-unit (i.e. 14.6 m) EW baselines, 2-unit EW baselines, 4-unit EW baselines, and one inter-sector baseline. This is done for both polarizations (and eventually for Pseudo-Stokes I and Q). We look at delay-spectrum waterfalls vs. LST, delay-spectrum waterfalls vs. fringe-rate, and averaged delay-spectra for these baselines. Those spectra are plotted for two wide bands, one below FM and one above FM.
This notebook is divided into four sections:
• Section 1: Time + Frequency Inpainting of Redundantly Averaged Calibrated Visibilities¶
• Section 2: Delay-Filter and Cross-Talk Filter Redundantly-Averaged Visibilities¶
• Section 3: Form Psuedo-Stokes I + Q¶
• Section 4: Main-Beam Fringe-Rate Filter¶
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin # REQUIRED to have the compression plugins available
import glob
import uvtools
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
from pyuvdata import uvbeam, uvdata
from hera_filters import dspec
from hera_cal import io, frf, vis_clean, redcal
from hera_cal.datacontainer import DataContainer, RedDataContainer
from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore') # get rid of red warnings
%config InlineBackend.figure_format = 'retina'
# Get file names
SUM_FILE = os.environ.get("SUM_FILE", None)
#SUM_FILE = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459866/zen.2459869.25282.sum.uvh5'
RED_AVG_SUFFIX = os.environ.get("RED_AVG_SUFFIX", "sum.smooth_calibrated.red_avg.uvh5")
# Arguements for delay-filtering, fringe-rate filtering, cross-talk subtraction, and inpainting
XTALK_FR = float(os.environ.get("XTALK_FR", 0.025)) # Fringe rate half-width used for fringe rate filtering
FILT_MIN_DLY = float(os.environ.get("FILT_MIN_DLY", 150.)) # minimum delay for delay-filter in units of ns
INPAINT_MIN_DLY = float(os.environ.get("INPAINT_MIN_DLY", 500.)) # minimum delay for inpainting in units of ns
INPAINT_REGULARIZATION = float(os.environ.get("INPAINT_REGULARIZATION", 1e-5)) # reasonable values are between 1e-2 and 1e-5
STANDOFF = float(os.environ.get("STANDOFF", 50.)) # additional standoff added to min_dly for delay-filter in units of ns
INPAINT_FR = float(os.environ.get("INPAINT_FR", 2.5)) # Fringe-rate half-width in mHz for time inpainting
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
# Flag settings to get delay transform regions
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
# Maximum number of contiguous frequency channels in a time integration to attempt to inpaint over
# If number of contiguous flags is greater than this value, flagged channels will remain flagged
# and will attempt to be inpainted over along the time axis
MAX_CONTIGUOUS_FLAGS = int(os.environ.get("MAX_CONTIGUOUS_FLAGS", 20))
# Remove this number of channels on either side of the below/above FM bands to exclude from delay spectrum plots
SPECTRUM_CHAN_BUFFER = int(os.environ.get("SPECTRUM_CHAN_BUFFER", 25))
# Is the input LST-binned
IS_LST_BINNED = os.environ.get("IS_LST_BINNED", "FALSE").upper() == "TRUE"
for setting in ['XTALK_FR', 'FILT_MIN_DLY', 'INPAINT_MIN_DLY', 'STANDOFF', 'INPAINT_FR', 'EIGENVAL_CUTOFF',
'FM_LOW_FREQ', 'FM_HIGH_FREQ', 'MAX_CONTIGUOUS_FLAGS', 'SPECTRUM_CHAN_BUFFER', 'IS_LST_BINNED']:
print(f'{setting} = {eval(setting)}')
XTALK_FR = 0.025 FILT_MIN_DLY = 150.0 INPAINT_MIN_DLY = 500.0 STANDOFF = 50.0 INPAINT_FR = 2.5 EIGENVAL_CUTOFF = 1e-12 FM_LOW_FREQ = 87.5 FM_HIGH_FREQ = 108.0 MAX_CONTIGUOUS_FLAGS = 20 SPECTRUM_CHAN_BUFFER = 25 IS_LST_BINNED = False
sum_glob = os.path.join(os.path.dirname(SUM_FILE), "*" + RED_AVG_SUFFIX)
red_avg_files = sorted(glob.glob(sum_glob))
print(f'Found {len(red_avg_files)} *.{RED_AVG_SUFFIX} files starting with {red_avg_files[0]}.')
Found 1572 *.sum.smooth_calibrated.red_avg.uvh5 files starting with /mnt/sn1/data2/2460446/zen.2460446.16879.sum.smooth_calibrated.red_avg.uvh5.
# Select baselines for plotting
for i in range(len(red_avg_files) // 2):
hd = io.HERADataFastReader(red_avg_files[(len(red_avg_files) // 2) + i])
bls_to_plot = []
# Find 1, 2, and 4 units EW baselines
for bl in hd.bls:
bl_vec = (hd.antpos[bl[1]] - hd.antpos[bl[0]])
if (np.abs(bl_vec[1]) < 1) and int(np.round(bl_vec[0] / 14.6)) in [1, 2, 4]:
bls_to_plot.append(bl)
# Get intersector baseline
_, _, nsamples = hd.read()
nsamples = RedDataContainer(nsamples, antpos=hd.antpos)
# try a bunch of possible intersector baselines, hoping one will work
for ant2 in range(226, 233):
if (np.median(nsamples[(144, ant2, 'nn')]) > 0) & (np.median(nsamples[(144, ant2, 'ee')]) > 1):
bls_to_plot.append(nsamples.get_ubl_key((144, ant2, 'nn')))
bls_to_plot.append(nsamples.get_ubl_key((144, ant2, 'ee')))
break
if len(bls_to_plot) >= 8:
break
# Identify indices
FM_LOW_FREQ_IND = np.argmin(np.abs(hd.freqs - FM_LOW_FREQ * 1e6))
FM_HIGH_FREQ_IND = np.argmin(np.abs(hd.freqs - FM_HIGH_FREQ * 1e6))
hd = io.HERADataFastReader(red_avg_files)
d, f, n = hd.read(pols=['nn', 'ee'], bls=bls_to_plot)
# Create a truncated version of the data to deal with edge times that are flagged
data = deepcopy(d)
flags = deepcopy(f)
nsamples = deepcopy(n)
# Identify unflagged edge channels
unflagged_chans = np.where(~np.all(flags[bls_to_plot[0]], axis=1))[0]
inds = np.arange(np.min(unflagged_chans), np.max(unflagged_chans) + 1)
# Fix data
for bl in data:
data[bl] = d[bl][inds]
flags[bl] = f[bl][inds]
nsamples[bl] = n[bl][inds]
# Fix metadata
data.times = d.times[inds]
data.lsts = d.lsts[inds]
Section 1: Time + Frequency Inpainting of Redundantly Averaged Calibrated Visibilities¶
unit = int(np.round(np.linalg.norm(data.antpos[bls_to_plot[0][1]] - data.antpos[bls_to_plot[0][0]]) / 14.6, 1))
display(HTML(f'<h2>Figure 1: Amplitude and Phase of Calibrated {unit}-unit EW Redundantly-Averaged Baseline</h2>'))
# Get the LSTs from the data
lsts_unwrapped = np.where(data.lsts > data.lsts[-1], data.lsts - 2 * np.pi, data.lsts) * 12 / np.pi # in hours
extent = [data.freqs[0] / 1e6, data.freqs[-1] / 1e6, lsts_unwrapped[-1], lsts_unwrapped[0]]
if not IS_LST_BINNED:
fig, axs = plt.subplots(2, 2, figsize=(18, 14), sharex=True, sharey=True, gridspec_kw={'hspace': .03, 'wspace': .03})
else:
fig, axs = plt.subplots(2, 3, figsize=(18, 14), sharex=True, sharey=True, gridspec_kw={'hspace': .03, 'wspace': .03})
fig.subplots_adjust(hspace=0.1, wspace=0.1)
# Plot the amplitude of ee and nn
im1 = axs[0, 0].imshow(np.where(flags[bls_to_plot[0]], np.nan, np.abs(data[bls_to_plot[0]])), aspect='auto', interpolation='none',
norm=matplotlib.colors.LogNorm(), cmap='inferno', extent=extent)
axs[1, 0].imshow(np.where(flags[bls_to_plot[1]], np.nan, np.abs(data[bls_to_plot[1]])), aspect='auto', interpolation='none',
norm=matplotlib.colors.LogNorm(vmin=im1.get_clim()[0], vmax=im1.get_clim()[1]), cmap='inferno', extent=extent)
# Plot the phase of ee and nn
im2 = axs[0, 1].imshow(
np.where(flags[bls_to_plot[0]], np.nan, np.angle(data[bls_to_plot[0]])),
aspect='auto', cmap='twilight', interpolation='None', extent=extent,
vmin=-np.pi, vmax=np.pi
)
axs[1, 1].imshow(
np.where(flags[bls_to_plot[1]], np.nan, np.angle(data[bls_to_plot[1]])),
aspect='auto', cmap='twilight', interpolation='None', extent=extent,
vmin=-np.pi, vmax=np.pi
)
fig.colorbar(im1, ax=axs[:, 0], label='$|V_{ij}|$ (Jy)', location='top', pad=0.01)
fig.colorbar(im2, ax=axs[:, 1], label='Angle($V_{ij})$ (radians)', location='top', pad=0.01)
if IS_LST_BINNED:
# Plot the amplitude of ee and nn
im3 = axs[0, 2].imshow(np.where(flags[bls_to_plot[0]], np.nan, nsamples[bls_to_plot[0]]), aspect='auto', interpolation='none',
cmap='viridis', extent=extent)
axs[1, 2].imshow(np.where(flags[bls_to_plot[1]], np.nan, nsamples[bls_to_plot[1]]), aspect='auto', interpolation='none',
vmin=im3.get_clim()[0], vmax=im3.get_clim()[1], cmap='viridis', extent=extent)
fig.colorbar(im3, ax=axs[:, 2], label='Number of Samples', location='top', pad=0.01)
bls_for_labeling = [bls_to_plot[0][2]] * 3 + [bls_to_plot[1][2]] * 3
else:
bls_for_labeling = [bls_to_plot[0][2]] * 2 + [bls_to_plot[1][2]] * 2
for ax, pol in zip(axs.ravel(), bls_for_labeling):
ax.text(0.03, 0.97, f"{unit}-unit EW ({pol})",
horizontalalignment='left', verticalalignment='top',
color='black', transform=ax.transAxes, fontsize=12,
bbox=dict(facecolor='w', alpha=.7, boxstyle='round'))
for i in range(axs.shape[1]):
axs[1, i].set_xlabel('Frequency (MHz)')
for i in range(axs.shape[0]):
axs[i, 0].set_ylabel('LST (hours)')
plt.tight_layout()
Figure 1: Amplitude and Phase of Calibrated 1-unit EW Redundantly-Averaged Baseline
This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.