by Josh Dillon, last updated March 29, 2023
This notebook runs calibration smoothing to the gains coming out of file_calibration notebook. It removes any flags founds on by that notebook and replaces them with flags generated from full_day_rfi and full_day_antenna_flagging. It also plots the results for a couple of antennas.
Here's a set of links to skip to particular figures and tables:
smooth_cal
¶smooth_cal
¶import time
tstart = time.time()
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import warnings
import matplotlib
import matplotlib.pyplot as plt
from hera_cal import io, utils, smooth_cal
from hera_qm.time_series_metrics import true_stretches
%matplotlib inline
from IPython.display import display, HTML
# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/users/jsdillon/lustre/H6C/abscal/2459853/zen.2459853.25518.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ANT_FLAG_SUFFIX = os.environ.get("ANT_FLAG_SUFFIX", 'sum.antenna_flags.h5')
RFI_FLAG_SUFFIX = os.environ.get("RFI_FLAG_SUFFIX", 'sum.flag_waterfall.h5')
FREQ_SMOOTHING_SCALE = float(os.environ.get("FREQ_SMOOTHING_SCALE", 10.0)) # MHz
TIME_SMOOTHING_SCALE = float(os.environ.get("TIME_SMOOTHING_SCALE", 6e5)) # seconds
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'SMOOTH_CAL_SUFFIX', 'ANT_FLAG_SUFFIX',
'RFI_FLAG_SUFFIX', 'FREQ_SMOOTHING_SCALE', 'TIME_SMOOTHING_SCALE', 'EIGENVAL_CUTOFF']:
print(f'{setting} = {eval(setting)}')
SUM_FILE = /mnt/sn1/2460210/zen.2460210.52316.sum.uvh5 SUM_SUFFIX = sum.uvh5 CAL_SUFFIX = sum.omni.calfits SMOOTH_CAL_SUFFIX = sum.smooth.calfits ANT_FLAG_SUFFIX = sum.antenna_flags.h5 RFI_FLAG_SUFFIX = sum.flag_waterfall.h5 FREQ_SMOOTHING_SCALE = 10.0 TIME_SMOOTHING_SCALE = 600000.0 EIGENVAL_CUTOFF = 1e-12
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 175 *.sum.omni.calfits files starting with /mnt/sn1/2460210/zen.2460210.52316.sum.omni.calfits.
rfi_flag_files_glob = sum_glob.replace(SUM_SUFFIX, RFI_FLAG_SUFFIX)
rfi_flag_files = sorted(glob.glob(rfi_flag_files_glob))
print(f'Found {len(rfi_flag_files)} *.{RFI_FLAG_SUFFIX} files starting with {rfi_flag_files[0]}.')
Found 175 *.sum.flag_waterfall.h5 files starting with /mnt/sn1/2460210/zen.2460210.52316.sum.flag_waterfall.h5.
ant_flag_files_glob = sum_glob.replace(SUM_SUFFIX, ANT_FLAG_SUFFIX)
ant_flag_files = sorted(glob.glob(ant_flag_files_glob))
print(f'Found {len(ant_flag_files)} *.{ANT_FLAG_SUFFIX} files starting with {ant_flag_files[0]}.')
Found 175 *.sum.antenna_flags.h5 files starting with /mnt/sn1/2460210/zen.2460210.52316.sum.antenna_flags.h5.
cs = smooth_cal.CalibrationSmoother(cal_files, flag_file_list=(ant_flag_files + rfi_flag_files), ignore_calflags=True,
pick_refant=True, propagate_refant_flags=True, load_chisq=True, load_cspa=True)
for pol in cs.refant:
print(f'Reference antenna {cs.refant[pol][0]} selected for {pol}.')
invalid value encountered in multiply Mean of empty slice
Reference antenna 69 selected for Jee. Reference antenna 55 selected for Jnn.
# duplicate a small number of abscal gains for plotting
antnums = set([ant[0] for ant in cs.ants])
flags_per_antnum = [np.sum(cs.flag_grids[ant, 'Jnn']) + np.sum(cs.flag_grids[ant, 'Jee']) for ant in antnums]
refant_nums = [ant[0] for ant in cs.refant.values()]
candidate_ants = [ant for ant, nflags in zip(antnums, flags_per_antnum) if (ant not in refant_nums) and (nflags <= np.percentile(flags_per_antnum, 25))
and not np.all(cs.flag_grids[ant, 'Jee']) and not np.all(cs.flag_grids[ant, 'Jnn'])]
ants_to_plot = [func(candidate_ants) for func in (np.min, np.max)]
abscal_gains = {(ant, pol): np.array(cs.gain_grids[(ant, pol)]) for ant in ants_to_plot for pol in ['Jee', 'Jnn']}
cs.time_freq_2D_filter(freq_scale=FREQ_SMOOTHING_SCALE, time_scale=TIME_SMOOTHING_SCALE, eigenval_cutoff=EIGENVAL_CUTOFF,
method='DPSS', fit_method='lu_solve', fix_phase_flips=True, flag_phase_flip_ints=True)
WARNING:jax._src.lib.xla_bridge:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
lst_grid = utils.JD2LST(cs.time_grid) * 12 / np.pi
lst_grid[lst_grid > lst_grid[-1]] -= 24
def amplitude_plot(ant_to_plot):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Pick vmax to not saturate 90% of the abscal gains
vmax = np.max([np.percentile(np.abs(cs.gain_grids[ant_to_plot, pol][~cs.flag_grids[ant_to_plot, pol]]), 99) for pol in ['Jee', 'Jnn']])
display(HTML(f'<h2>Antenna {ant_to_plot} Amplitude Waterfalls</h2>'))
# Plot abscal gain amplitude waterfalls for a single antenna
fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), aspect='auto', cmap='inferno',
interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
ax.set_title(f'Smoothcal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel('LST (Hours)')
ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
ax.set_yticklabels(ax.get_yticks() % 24)
plt.colorbar(im, ax=ax, orientation='horizontal', pad=.15)
# Now flagged plot abscal waterfall
for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), aspect='auto', cmap='inferno',
interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
ax.set_title(f'Abscal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel('LST (Hours)')
ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
ax.set_yticklabels(ax.get_yticks() % 24)
plt.colorbar(im, ax=ax, orientation='horizontal', pad=.15)
# Now plot mean gain spectra
for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')
ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')
ax.set_ylim([0, vmax])
ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel('|g| (unitless)')
ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
ax.legend(loc='upper left')
# Now plot mean gain time series
for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
nflags_series = np.sum(cs.flag_grids[ant], axis=1)
to_plot = nflags_series <= np.percentile(nflags_series, 75)
ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')
ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')
ax.set_ylim([0, vmax])
ax.set_xlabel('LST (hours)')
ax.set_ylabel('|g| (unitless)')
ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
ax.set_xticklabels(ax.get_xticks() % 24)
ax.legend(loc='upper left')
plt.tight_layout()
plt.show()
def phase_plot(ant_to_plot):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
display(HTML(f'<h2>Antenna {ant_to_plot} Phase Waterfalls</h2>'))
fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
# Plot phase waterfalls for a single antenna
for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), aspect='auto', cmap='inferno',
interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
ax.set_title(f'Smoothcal Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel('LST (Hours)')
ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
ax.set_yticklabels(ax.get_yticks() % 24)
plt.colorbar(im, ax=ax, orientation='horizontal', pad=.15)
# Now plot abscal phase waterfall
for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), aspect='auto', cmap='inferno',
interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
ax.set_title(f'Abscal Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel('LST (Hours)')
ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
ax.set_yticklabels(ax.get_yticks() % 24)
plt.colorbar(im, ax=ax, orientation='horizontal', pad=.15)
# Now plot median gain spectra
for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')
ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')
ax.set_ylim([-np.pi, np.pi])
ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel(f'Phase of g$_{{{ant[0]}}}$ / g$_{{{cs.refant[pol][0]}}}$')
ax.set_title(f'Median Infrequently-Flagged Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
ax.legend(loc='upper left')
# # Now plot median gain time series
for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
ant = (ant_to_plot, pol)
nflags_series = np.sum(cs.flag_grids[ant], axis=1)
to_plot = nflags_series <= np.percentile(nflags_series, 75)
ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')
ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')
ax.set_ylim([-np.pi, np.pi])
ax.set_xlabel('LST (hours)')
ax.set_ylabel(f'Phase of g$_{{{ant[0]}}}$ / g$_{{{cs.refant[pol][0]}}}$')
ax.set_title(f'Mean Infrequently-Flagged Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
ax.set_xticklabels(ax.get_xticks() % 24)
ax.legend(loc='upper left')
plt.tight_layout()
plt.show()
smooth_cal
¶Here we plot abscal
and smooth_cal
gain amplitudes for both of the sample antennas. We also show means across time/frequency, excluding frequencies/times that are frequently flagged.
for ant_to_plot in ants_to_plot:
amplitude_plot(ant_to_plot)
smooth_cal
¶Here we plot abscal
and smooth_cal
phases relative to each polarization's reference antenna for both of the sample antennas. We also show medians across time/frequency, excluding frequencies/times that are frequently flagged.
for ant_to_plot in ants_to_plot:
phase_plot(ant_to_plot)
def chisq_plot():
fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, sharey=True)
extent = [cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
for ax, pol in zip(axes, ['Jee', 'Jnn']):
im = ax.imshow(np.where(cs.flag_grids[cs.refant[pol]], np.nan, cs.chisq_grids[pol]), vmin=1, vmax=5,
aspect='auto', cmap='turbo', interpolation='none', extent=extent)
ax.set_yticklabels(ax.get_yticks() % 24)
ax.set_title(f'{pol[1:]}-Polarized $\\chi^2$ / DoF')
ax.set_xlabel('Frequency (MHz)')
axes[0].set_ylabel('LST (hours)')
plt.tight_layout()
fig.colorbar(im, ax=axes, pad=.07, label='$\\chi^2$ / DoF', orientation='horizontal', extend='both', aspect=50)
Here we plot $\chi^2$ per degree of freedom from redundant-baseline calibration for both polarizations separately. While this plot is a little out of place, as it was not produced by this notebook, it is a convenient place where all the necessary components are readily available. If the array were perfectly redundant and any non-redundancies in the calibrated visibilities were explicable by thermal noise alone, this waterfall should be all 1.
chisq_plot()
FixedFormatter should only be used together with FixedLocator
avg_cspa_vs_time = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=1) for ant in cs.ants}
avg_cspa_vs_freq = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=0) for ant in cs.ants}
Mean of empty slice Mean of empty slice
def cspa_vs_time_plot():
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
for ax, pol in zip(axes, ['Jee', 'Jnn']):
detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_time.items()
if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
for ant in avg_cspa_vs_time:
if ant[1] == pol and not np.all(cs.flag_grids[ant]):
if np.nanmean(avg_cspa_vs_time[ant]) > detail_cutoff:
ax.plot(lst_grid, avg_cspa_vs_time[ant], label=ant, zorder=100)
else:
ax.plot(lst_grid, avg_cspa_vs_time[ant], c='grey', alpha=.2, lw=.5)
ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
ax.set_xlabel('LST (hours)')
ax.set_xticklabels(ax.get_xticks() % 24)
plt.ylim([1, 5.4])
plt.tight_layout()
def cspa_vs_freq_plot():
fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
for ax, pol in zip(axes, ['Jee', 'Jnn']):
detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_freq.items()
if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
for ant in avg_cspa_vs_freq:
if ant[1] == pol and not np.all(cs.flag_grids[ant]):
if np.nanmean(avg_cspa_vs_freq[ant]) > detail_cutoff:
ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], label=ant, zorder=100)
else:
ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], c='grey', alpha=.2, lw=.5)
ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
ax.set_xlabel('Frequency (MHz)')
plt.ylim([1, 5.4])
plt.tight_layout()
Here we plot $\chi^2$ per antenna from redundant-baseline calibration, separating polarizations and averaging the unflagged pixels in the waterfalls over frequency or time. The worst 5% of antennas are shown in color and highlighted in the legends, the rest are shown in grey.
cspa_vs_time_plot()
cspa_vs_freq_plot()
Mean of empty slice FixedFormatter should only be used together with FixedLocator
add_to_history = 'Produced by calibration_smoothing notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
cs.write_smoothed_cal(output_replace=(CAL_SUFFIX, SMOOTH_CAL_SUFFIX), add_to_history=add_to_history, clobber=True)
invalid value encountered in multiply overflow encountered in absolute overflow encountered in absolute invalid value encountered in divide Mean of empty slice overflow encountered in multiply overflow encountered in cast
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
exec(f'from {repo} import __version__')
print(f'{repo}: {__version__}')
hera_cal: 3.4.0 hera_qm: 2.1.3.dev4+g50af006 hera_filters: 0.1.0 hera_notebook_templates: 0.1.dev486+gfb8560a pyuvdata: 2.4.0
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 3.30 minutes.