import time
tstart = time.time()

import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import matplotlib
import copy
from scipy.ndimage import convolve
from hera_cal import io, utils
from hera_cal.smooth_cal import dpss_filters, solve_2D_DPSS
from hera_qm import ant_class, xrfi, metrics_io
from hera_qm.time_series_metrics import true_stretches, impose_max_flag_gap, metric_convolution_flagging
from hera_filters import dspec
import warnings

from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore')  # get rid of red warnings
%config InlineBackend.figure_format = 'retina'

# get filenames
AUTO_FILE = os.environ.get("AUTO_FILE", None)
# AUTO_FILE = '/mnt/sn1/data2/2460458/zen.2460458.16881.sum.uvh5'

SUM_AUTOS_SUFFIX = os.environ.get("SUM_AUTOS_SUFFIX", 'sum.autos.uvh5')
DIFF_AUTOS_SUFFIX = os.environ.get("DIFF_AUTOS_SUFFIX", 'diff.autos.uvh5')
OUT_YAML_SUFFIX = os.environ.get("OUT_YAML_SUFFIX", '_apriori_flags.yaml')
OUT_YAML_DIR = os.environ.get("OUT_YAML_DIR", None)
# OUT_YAML_DIR = '/lustre/aoc/projects/hera/jsdillon/H6C/'

if OUT_YAML_DIR is None:
    OUT_YAML_DIR = os.path.dirname(AUTO_FILE)

auto_sums_glob = '.'.join(AUTO_FILE.split('.')[:-4]) + '.*.' + SUM_AUTOS_SUFFIX
auto_diffs_glob = auto_sums_glob.replace(SUM_AUTOS_SUFFIX, DIFF_AUTOS_SUFFIX)
out_yaml_file = os.path.join(OUT_YAML_DIR, AUTO_FILE.split('.')[-5] + OUT_YAML_SUFFIX)

auto_sums = sorted(glob.glob(auto_sums_glob))
print(f'Found {len(auto_sums)} *.{SUM_AUTOS_SUFFIX} files starting with {auto_sums[0]}.')

auto_diffs = sorted(glob.glob(auto_diffs_glob))
print(f'Found {len(auto_diffs)} *.{DIFF_AUTOS_SUFFIX} files starting with {auto_diffs[0]}.')

Found 1572 *.sum.autos.uvh5 files starting with /mnt/sn1/data2/2460542/zen.2460542.16901.sum.autos.uvh5.
Found 1572 *.diff.autos.uvh5 files starting with /mnt/sn1/data2/2460542/zen.2460542.16901.diff.autos.uvh5.

# bounds on zeros in spectra
good_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_GOOD", 2)))
suspect_zeros_per_eo_spectrum = (0, int(os.environ.get("MAX_ZEROS_PER_EO_SPEC_SUSPECT", 8)))

# bounds on autocorrelation power
auto_power_good = (float(os.environ.get("AUTO_POWER_GOOD_LOW", 5)), float(os.environ.get("AUTO_POWER_GOOD_HIGH", 30)))
auto_power_suspect = (float(os.environ.get("AUTO_POWER_SUSPECT_LOW", 1)), float(os.environ.get("AUTO_POWER_SUSPECT_HIGH", 60)))

# bounds on autocorrelation slope
auto_slope_good = (float(os.environ.get("AUTO_SLOPE_GOOD_LOW", -0.4)), float(os.environ.get("AUTO_SLOPE_GOOD_HIGH", 0.4)))
auto_slope_suspect = (float(os.environ.get("AUTO_SLOPE_SUSPECT_LOW", -0.6)), float(os.environ.get("AUTO_SLOPE_SUSPECT_HIGH", 0.6)))

# bounds on autocorrelation RFI
auto_rfi_good = (0, float(os.environ.get("AUTO_RFI_GOOD", 1.5)))
auto_rfi_suspect = (0, float(os.environ.get("AUTO_RFI_SUSPECT", 2)))

# bounds on autocorrelation shape
auto_shape_good = (0, float(os.environ.get("AUTO_SHAPE_GOOD", 0.1)))
auto_shape_suspect = (0, float(os.environ.get("AUTO_SHAPE_SUSPECT", 0.2)))

# parse RFI settings for antenna flagging
RFI_DPSS_HALFWIDTH = float(os.environ.get("RFI_DPSS_HALFWIDTH", 300e-9)) # in s
RFI_NSIG = float(os.environ.get("RFI_NSIG", 6))

# parse settings for identifying mislabeled data by X-engine
BAD_XENGINE_ZCUT = float(os.environ.get("BAD_XENGINE_ZCUT", 10))

# DPSS settings for full-day filtering
FREQ_FILTER_SCALE = float(os.environ.get("FREQ_FILTER_SCALE", 5.0)) # in MHz
TIME_FILTER_SCALE = float(os.environ.get("TIME_FILTER_SCALE", 450.0)) # in s
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))

# A priori flag settings
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
FM_freq_range = [FM_LOW_FREQ * 1e6, FM_HIGH_FREQ * 1e6]
MAX_SOLAR_ALT = float(os.environ.get("MAX_SOLAR_ALT", 0.0)) # in degrees
SMOOTHED_ABS_Z_THRESH = float(os.environ.get("SMOOTHED_ABS_Z_THRESH", 10))
WHOLE_DAY_FLAG_THRESH = float(os.environ.get("WHOLE_DAY_FLAG_THRESH", 0.5))

for setting in ['good_zeros_per_eo_spectrum', 'suspect_zeros_per_eo_spectrum', 'auto_power_good', 'auto_power_suspect', 
                'auto_slope_good', 'auto_slope_suspect', 'auto_rfi_good', 'auto_rfi_suspect',
                'auto_shape_good', 'auto_shape_suspect', 'BAD_XENGINE_ZCUT', 'RFI_DPSS_HALFWIDTH', 'RFI_NSIG',
                'FREQ_FILTER_SCALE', 'TIME_FILTER_SCALE', 'EIGENVAL_CUTOFF', 'FM_freq_range',
                'MAX_SOLAR_ALT', 'SMOOTHED_ABS_Z_THRESH', 'WHOLE_DAY_FLAG_THRESH']:
        print(f'{setting} = {eval(setting)}')

good_zeros_per_eo_spectrum = (0, 2)
suspect_zeros_per_eo_spectrum = (0, 8)
auto_power_good = (5.0, 30.0)
auto_power_suspect = (1.0, 60.0)
auto_slope_good = (-0.4, 0.4)
auto_slope_suspect = (-0.6, 0.6)
auto_rfi_good = (0, 1.5)
auto_rfi_suspect = (0, 2.0)
auto_shape_good = (0, 0.1)
auto_shape_suspect = (0, 0.2)
BAD_XENGINE_ZCUT = 10.0
RFI_DPSS_HALFWIDTH = 3e-07
RFI_NSIG = 4.0
FREQ_FILTER_SCALE = 5.0
TIME_FILTER_SCALE = 450.0
EIGENVAL_CUTOFF = 1e-12
FM_freq_range = [87500000.0, 108000000.0]
MAX_SOLAR_ALT = 0.0
SMOOTHED_ABS_Z_THRESH = 10.0
WHOLE_DAY_FLAG_THRESH = 0.5

cache = {}

def auto_bl_zscores(data, flag_array, prior_class=None, cache={}):
    '''This function computes z-score arrays for each delay-filtered autocorrelation, normalized by the expected noise. 
    Flagged times/channels for the whole array are given 0 weight in filtering and are np.nan in the z-score.'''
    zscores = {}
    int_time = 24 * 3600 * np.median(np.diff(data.times))
    chan_res = np.median(np.diff(data.freqs))
    int_count = int(int_time * chan_res)    
    for bl in data:
        if utils.split_bl(bl)[0] != utils.split_bl(bl)[1]:
            continue
        if prior_class is not None:
            if (prior_class[utils.split_bl(bl)[0]] == 'bad'):
                continue
        wgts = np.array(np.logical_not(flag_array), dtype=np.float64)
        model, _, _ = dspec.fourier_filter(data.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',
                                            suppression_factors=[1e-9], eigenval_cutoff=[1e-9], cache=cache)
        res = data[bl] - model
        sigma = np.abs(model) / np.sqrt(int_count / 2)
        zscores[bl] = res / sigma    
        zscores[bl][flag_array] = np.nan

    return zscores

def rfi_from_avg_autos(data, auto_bls_to_use, prior_flags=None, nsig=RFI_NSIG):
    '''Average together all baselines in auto_bls_to_use, then find an RFI mask by looking for outliers after DPSS filtering.'''
    
    # Compute int_count for all unflagged autocorrelations averaged together
    int_time = 24 * 3600 * np.median(np.diff(data.times))
    chan_res = np.median(np.diff(data.freqs))
    int_count = int(int_time * chan_res) * len(auto_bls_to_use)
    avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls_to_use], axis=0)}
    
    # Flag RFI first with channel differences and then with DPSS
    antenna_flags, _ = xrfi.flag_autos(avg_auto, int_count=int_count, nsig=(RFI_NSIG * 5))
    if prior_flags is not None:
        antenna_flags[(-1, -1, 'ee')] = prior_flags
    _, rfi_flags = xrfi.flag_autos(avg_auto, int_count=int_count, flag_method='dpss_flagger',
                                   flags=antenna_flags, freqs=data.freqs, filter_centers=[0],
                                   filter_half_widths=[RFI_DPSS_HALFWIDTH], eigenval_cutoff=[1e-9], nsig=nsig)

    return rfi_flags

def classify_autos_and_preliminary_rfi(auto_sum_file, auto_diff_file):
    
    hd_sum = io.HERADataFastReader(auto_sum_file)
    sum_autos, _, _ = hd_sum.read(read_flags=False, read_nsamples=False)
    hd_diff = io.HERADataFastReader(auto_diff_file)
    diff_autos, _, _ = hd_diff.read(read_flags=False, read_nsamples=False, dtype=np.complex64, fix_autos_func=np.real)
    ants = sorted(set([ant for bl in hd_sum.bls for ant in utils.split_bl(bl)]))
    
    # check for zeros in the evens or odds
    zeros_class = ant_class.even_odd_zeros_checker(sum_autos, diff_autos, good=good_zeros_per_eo_spectrum, suspect=suspect_zeros_per_eo_spectrum)

    # check for problems in auto power or slope
    auto_power_class = ant_class.auto_power_checker(sum_autos, good=auto_power_good, suspect=auto_power_suspect)
    auto_slope_class = ant_class.auto_slope_checker(sum_autos, good=auto_slope_good, suspect=auto_slope_suspect, edge_cut=100, filt_size=17)   
    
    overall_class = zeros_class + auto_power_class + auto_slope_class
    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
    
    # find initial set of flags
    antenna_flags, array_flags = xrfi.flag_autos(sum_autos, flag_method="channel_diff_flagger", nsig=RFI_NSIG * 5, 
                                                 antenna_class=overall_class, flag_broadcast_thresh=.5)
    for key in antenna_flags:
        antenna_flags[key] = array_flags
    _, array_flags = xrfi.flag_autos(sum_autos, freqs=sum_autos.freqs, flag_method="dpss_flagger",
                                     nsig=RFI_NSIG, antenna_class=overall_class,
                                     filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH],
                                     eigenval_cutoff=[1e-9], flags=antenna_flags, mode='dpss_matrix', 
                                     cache=cache, flag_broadcast_thresh=.5)        

    # check for non-noiselike x-engine diffs
    xengine_diff_class = ant_class.non_noiselike_diff_by_xengine_checker(sum_autos, diff_autos, flag_waterfall=array_flags, 
                                                                 antenna_class=overall_class, 
                                                                 xengine_chans=96, bad_xengine_zcut=BAD_XENGINE_ZCUT)

    # update overall_class and return if all antennas are bad
    overall_class += xengine_diff_class
    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
    
    
    # Iteratively develop RFI mask, excess RFI classification, and autocorrelation shape classification
    stage = 1
    rfi_flags = np.array(array_flags)
    prior_end_states = set()
    while True:
        # compute DPSS-filtered z-scores with current array-wide RFI mask
        zscores = auto_bl_zscores(sum_autos, rfi_flags, cache=cache,
                                  prior_class=(auto_power_class + auto_slope_class + zeros_class + xengine_diff_class))
        rms = {bl: np.nanmean(zscores[bl]**2)**.5 if np.any(np.isfinite(zscores[bl])) else np.inf for bl in zscores}

        # figure out which autos to use for finding new set of flags
        candidate_autos = [bl for bl in sum_autos if overall_class[utils.split_bl(bl)[0]] != 'bad']
        if stage == 1:
            # use best half of the unflagged antennas
            med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])
            autos_to_use = [bl for bl in candidate_autos if rms[bl] <= med_rms]
        elif stage == 2:
            # use all unflagged antennas which are auto RFI good, or the best half, whichever is larger
            med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])
            best_half_autos = [bl for bl in candidate_autos if rms[bl] <= med_rms]
            good_autos = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')
                          and (auto_rfi_class[utils.split_bl(bl)[0]] == 'good')]
            autos_to_use = (best_half_autos if len(best_half_autos) > len(good_autos) else good_autos)
        elif stage == 3:
            # use all unflagged antennas which are auto RFI good or suspect
            autos_to_use = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')]

        # compute new RFI flags
        rfi_flags = rfi_from_avg_autos(sum_autos, autos_to_use)

        # perform auto shape and RFI classification
        overall_class = auto_power_class + auto_slope_class + zeros_class + xengine_diff_class
        auto_rfi_class = ant_class.antenna_bounds_checker(rms, good=auto_rfi_good, suspect=auto_rfi_suspect, bad=(0, np.inf))
        overall_class += auto_rfi_class
        auto_shape_class = ant_class.auto_shape_checker(sum_autos, good=auto_shape_good, suspect=auto_shape_suspect,
                                                        flag_spectrum=np.sum(rfi_flags, axis=0).astype(bool), 
                                                        antenna_class=overall_class)
        overall_class += auto_shape_class

        # check for convergence by seeing whether we've previously gotten to this number of flagged antennas and channels
        if stage == 3:
            if (len(overall_class.bad_ants), np.sum(rfi_flags)) in prior_end_states:
                break
            prior_end_states.add((len(overall_class.bad_ants), np.sum(rfi_flags)))
        else:
            stage += 1    
    
    # return all flagged if all antennnas are bad, otherwise return overall class and rfi_flags
    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:
        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)
    else:
        return overall_class, rfi_flags

classifications = {}
preliminary_rfi_flags = {}
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for auto_sum_file, auto_diff_file in list(zip(auto_sums, auto_diffs)):
        classifications[auto_sum_file], preliminary_rfi_flags[auto_sum_file] = classify_autos_and_preliminary_rfi(auto_sum_file, auto_diff_file)

all_ants = set([ant for auto_sum_file in classifications for ant in classifications[auto_sum_file]])
ant_flag_fracs = {ant: 0 for ant in all_ants}
for classification in classifications.values():
    for ant in classification.bad_ants:
        ant_flag_fracs[ant] += 1
ant_flag_fracs = {ant: ant_flag_fracs[ant] / len(classifications) for ant in all_ants}

def flag_frac_array_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), dpi=100, gridspec_kw={'width_ratios': [2, 1]})

    def flag_frac_panel(ax, antnums, radius=7, legend=False):

        ang_dict = {'Jee': (225, 405), 'Jnn': (45, 225)}
        hd_sum = io.HERADataFastReader(auto_sums[-1])
        xpos = np.array([hd_sum.antpos[ant[0]][0] for ant in all_ants if ant[0] in antnums])
        ypos = np.array([hd_sum.antpos[ant[0]][1] for ant in all_ants if ant[0] in antnums])
        scatter = ax.scatter(xpos, ypos, c='w', s=0)
        for ant in all_ants:
            antnum, pol = ant
            if antnum in antnums:
                ax.add_artist(matplotlib.patches.Wedge(tuple(hd_sum.antpos[antnum][0:2]), radius, *ang_dict[pol], color='grey'))
                flag_frac = ant_flag_fracs[ant]
                if flag_frac > .05:
                    ax.add_artist(matplotlib.patches.Wedge(tuple(hd_sum.antpos[antnum][0:2]), radius * np.sqrt(flag_frac), *ang_dict[pol], color='r'))
                ax.text(hd_sum.antpos[antnum][0], hd_sum.antpos[antnum][1], str(antnum), color='w',  va='center', ha='center', zorder=100)

        ax.axis('equal')
        ax.set_xlim([np.min(xpos) - radius * 2, np.max(xpos) + radius * 2])
        ax.set_ylim([np.min(ypos) - radius * 2, np.max(ypos) + radius * 2])
        ax.set_xlabel("East-West Position (meters)", size=12)
        ax.set_ylabel("North-South Position (meters)", size=12)

        if legend:
            legend_objs = []
            legend_labels = []

            legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='grey', markerfacecolor='grey', markersize=15))
            unflagged_nights = lambda pol: np.sum([1 - ant_flag_fracs[ant] for ant in all_ants if ant[-1] == pol])
            legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} unflagged {pol[-1]}-polarized\nantenna-nights.' for pol in ['Jee', 'Jnn']]))

            legend_objs.append(matplotlib.lines.Line2D([0], [0], marker='o', color='w', markeredgecolor='red', markerfacecolor='red', markersize=15))
            unflagged_nights = lambda pol: np.sum([ant_flag_fracs[ant] for ant in all_ants if ant[-1] == pol])
            legend_labels.append((' \u2571\n').join([f'{unflagged_nights(pol):.1f} flagged {pol[-1]}-polarized\nantenna-nights.' for pol in ['Jee', 'Jnn']]))        
            ax.legend(legend_objs, legend_labels, ncol=1, fontsize=12)

    flag_frac_panel(axes[0], sorted(set([ant[0] for ant in all_ants if ant[0] < 320])), radius=7)
    flag_frac_panel(axes[1], sorted(set([ant[0] for ant in all_ants if ant[0] >= 320])), radius=50, legend=True)
    plt.tight_layout()

flag_frac_array_plot()

min_flag_frac = np.min(list(ant_flag_fracs.values()))
least_flagged_ants = sorted([ant for ant in all_ants if ant_flag_fracs[ant] == min_flag_frac])
least_flagged_autos = [utils.join_bl(ant, ant) for ant in least_flagged_ants]

avg_autos = {}
times = []
for auto_sum_file in auto_sums:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
    hd_sum = io.HERADataFastReader(auto_sum_file)
    sum_autos, _, _ = hd_sum.read(bls=least_flagged_autos, read_flags=False, read_nsamples=False)
    avg_autos[auto_sum_file] = np.mean([sum_autos[bl] for bl in sum_autos], axis=0)
    times.extend(hd_sum.times)
times = np.array(times)
freqs = hd_sum.freqs

flags = np.vstack([flag for flag in preliminary_rfi_flags.values()])
avg_auto = np.vstack([auto for auto in avg_autos.values()])

def flag_FM(flags, freqs, freq_range=[87.5e6, 108e6]):
    '''Apply flags to all frequencies within freq_range (in Hz).'''
    flags[:, np.logical_and(freqs >= freq_range[0], freqs <= freq_range[1])] = True 
    
def flag_sun(flags, times, max_solar_alt=0):
    '''Apply flags to all times where the solar altitude is greater than max_solar_alt (in degrees).'''
    solar_altitudes_degrees = utils.get_sun_alt(times)
    flags[solar_altitudes_degrees >= max_solar_alt, :] = True

flag_FM(flags, freqs, freq_range=FM_freq_range)
solar_flags = np.zeros_like(flags)
flag_sun(solar_flags, times, max_solar_alt=MAX_SOLAR_ALT)
flags |= solar_flags
all_flagged_times = np.all(flags, axis=1)

def predict_auto_noise(auto, dt, df, nsamples=1):
    '''Predict noise on an (antenna-averaged) autocorrelation. The product of Delta t and Delta f
    must be unitless. For N autocorrelations averaged together, use nsamples=N.'''
    int_count = int(dt * df) * nsamples
    return np.abs(auto) / np.sqrt(int_count / 2)

# Figure out noise and weights
dt = np.median(np.diff(times))
int_time = 24 * 3600 * dt
chan_res = np.median(np.diff(freqs))
noise = predict_auto_noise(avg_auto, int_time, chan_res, nsamples=len(least_flagged_ants))
wgts = np.where(flags, 0, noise**-2)

# enable handling of missing file gaps
time_grid = np.arange(times[0], times[-1] + dt, dt)
time_indices = {i: np.searchsorted(time_grid, t) for i, t in enumerate(times)}
avg_auto_on_grid = np.zeros((len(time_grid), len(freqs)), dtype=float)
wgts_on_grid = np.zeros((len(time_grid), len(freqs)), dtype=float)
flags_on_grid = np.ones((len(time_grid), len(freqs)), dtype=bool)
for i in time_indices:
    avg_auto_on_grid[time_indices[i], :] = avg_auto[i, :]
    wgts_on_grid[time_indices[i], :] = wgts[i, :]
    flags_on_grid[time_indices[i], :] = flags[i, :]
all_flagged_times_on_grid = np.all(flags_on_grid, axis=1)

time_filters, freq_filters = dpss_filters(freqs=freqs, # Hz
                                          times=time_grid, # JD
                                          freq_scale=FREQ_FILTER_SCALE,
                                          time_scale=TIME_FILTER_SCALE,
                                          eigenval_cutoff=EIGENVAL_CUTOFF)

model, fit_info = solve_2D_DPSS(avg_auto_on_grid, wgts_on_grid, time_filters, freq_filters, method='lu_solve')

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.

noise_model = predict_auto_noise(np.abs(model), int_time, chan_res, nsamples=len(least_flagged_ants))
zscore = ((avg_auto_on_grid - model) / noise_model).real

def plot_z_score(flags, zscore):
    plt.figure(figsize=(14,10), dpi=100)
    extent = [freqs[0]/1e6, freqs[-1]/1e6, times[-1] - int(times[0]), times[0] - int(times[0])]
    plt.imshow(np.where(flags, np.nan, zscore.real), aspect='auto', cmap='bwr', interpolation='none', vmin=-SMOOTHED_ABS_Z_THRESH, vmax=SMOOTHED_ABS_Z_THRESH, extent=extent)
    plt.colorbar(location='top', label='z score', extend='both')
    plt.xlabel('Frequency (MHz)')
    plt.ylabel(f'JD - {int(times[0])}')
    plt.tight_layout()

plot_z_score(flags_on_grid, zscore)

# Propose flags for night times when there's too much temporal structure due to, e.g. lightning
sigma = TIME_FILTER_SCALE / int_time
metric = np.nanmean(np.where(flags_on_grid, np.nan, np.abs(zscore)), axis=1)
kernel = np.exp(-np.arange(-len(metric) // 2, len(metric) // 2 + 1)**2 / 2 / sigma**2)
kernel /= np.sum(kernel)
convolved_metric = np.full_like(metric, np.nan)
convolved_metric[~all_flagged_times_on_grid] = convolve(metric[~all_flagged_times_on_grid], kernel, mode='reflect')
apriori_time_flags = np.ones_like(metric, dtype=bool)
apriori_time_flags[~all_flagged_times_on_grid] = metric_convolution_flagging(metric[~all_flagged_times_on_grid], convolved_metric[~all_flagged_times_on_grid] >= SMOOTHED_ABS_Z_THRESH, 
                                                                             [0, SMOOTHED_ABS_Z_THRESH], sigma=(TIME_FILTER_SCALE / int_time), max_flag_gap=0)

Mean of empty slice

# Flag whole day if too much of the day is flagged
apriori_flag_frac = np.mean(apriori_time_flags[~all_flagged_times_on_grid])
if apriori_flag_frac > WHOLE_DAY_FLAG_THRESH:
    print(f'A priori time flag fraction of {apriori_flag_frac:.2%} is greater than {WHOLE_DAY_FLAG_THRESH:.2%}... Flagging whole day.')
    apriori_time_flags = np.ones_like(apriori_time_flags)

def apriori_flag_plot():
    plt.figure(figsize=(14, 6))
    plt.semilogy(time_grid - int(time_grid[0]), np.nanmean(np.where(flags_on_grid, np.nan, np.abs(zscore)), axis=1), label='Average |z| Over Frequency')
    plt.semilogy(time_grid - int(time_grid[0]), convolved_metric, label=f'Convolved on {TIME_FILTER_SCALE}-second timescale')
    plt.axhline(SMOOTHED_ABS_Z_THRESH, color='k', ls='--', label='Threshold on Convolved |z|')
    for i, apf in enumerate(true_stretches(apriori_time_flags)):
        plt.axvspan((time_grid - int(time_grid[0]))[apf.start], (time_grid - int(time_grid[0]))[apf.stop - 1], color='r', zorder=0, alpha=.3, 
                    label=('Proposed A Priori Flags' if i == 0 else None))
    plt.legend()

    plt.xlabel(f'JD - {int(time_grid[0])}')
    plt.ylabel('Frequency Averaged |z-score|')
    plt.tight_layout()

apriori_flag_plot()

Mean of empty slice

dt = int_time / 3600 / 24 / 2
out_yml_str = 'JD_flags: ' + str([[time_grid[apf][0] - dt / 2, time_grid[apf][-1] + dt / 2] for apf in true_stretches(apriori_time_flags)])
out_yml_str += '\n\nfreq_flags: ' + str([[freqs[apf][0] - chan_res / 2, freqs[apf][-1] + chan_res / 2] for apf in true_stretches(np.all(flags, axis=0))])
out_yml_str += '\n\nex_ants: ' + str(sorted([ant for ant in all_ants if ant_flag_fracs[ant] == 1])).replace("'", "").replace('(', '[').replace(')', ']')
out_yml_str += '\n\nall_ant: ' + str(sorted(all_ants)).replace("'", "").replace('(', '[').replace(')', ']')
out_yml_str += f'\n\njd_range: [{times.min()}, {times.max()}]'
out_yml_str += f'\n\nfreq_range: [{freqs.min()}, {freqs.max()}]'

print(f'Writing the following to {out_yaml_file}\n' + '-' * (25 + len(out_yaml_file)))
print(out_yml_str)
with open(out_yaml_file, 'w') as outfile:
    outfile.writelines(out_yml_str)

Writing the following to /mnt/sn1/data2/2460542/2460542_apriori_flags.yaml
--------------------------------------------------------------------------
JD_flags: [[2460542.168925961, 2460542.171889936]]

freq_flags: [[62484741.2109375, 62606811.5234375], [87509155.2734375, 108016967.7734375], [124984741.2109375, 125106811.5234375], [141464233.3984375, 141586303.7109375], [142074584.9609375, 142318725.5859375], [147445678.7109375, 147567749.0234375], [187484741.2109375, 187606811.5234375], [223129272.4609375, 223251342.7734375]]

ex_ants: [[8, Jee], [8, Jnn], [15, Jnn], [18, Jee], [18, Jnn], [22, Jnn], [27, Jee], [27, Jnn], [28, Jee], [33, Jnn], [34, Jee], [40, Jnn], [46, Jee], [47, Jee], [47, Jnn], [61, Jee], [61, Jnn], [63, Jee], [63, Jnn], [64, Jee], [64, Jnn], [77, Jee], [77, Jnn], [78, Jee], [78, Jnn], [86, Jnn], [87, Jee], [88, Jee], [88, Jnn], [90, Jee], [90, Jnn], [98, Jnn], [104, Jnn], [106, Jee], [106, Jnn], [107, Jee], [107, Jnn], [109, Jnn], [120, Jee], [125, Jee], [125, Jnn], [126, Jee], [126, Jnn], [161, Jnn], [170, Jee], [171, Jnn], [176, Jee], [176, Jnn], [177, Jee], [177, Jnn], [178, Jee], [178, Jnn], [179, Jee], [179, Jnn], [180, Jnn], [182, Jee], [200, Jee], [201, Jnn], [218, Jnn], [232, Jee], [241, Jee], [241, Jnn], [242, Jee], [242, Jnn], [243, Jee], [243, Jnn], [251, Jee], [272, Jee], [272, Jnn], [329, Jee], [329, Jnn], [332, Jee], [332, Jnn], [333, Jee], [333, Jnn]]

all_ant: [[3, Jee], [3, Jnn], [4, Jee], [4, Jnn], [5, Jee], [5, Jnn], [7, Jee], [7, Jnn], [8, Jee], [8, Jnn], [9, Jee], [9, Jnn], [10, Jee], [10, Jnn], [15, Jee], [15, Jnn], [16, Jee], [16, Jnn], [17, Jee], [17, Jnn], [18, Jee], [18, Jnn], [19, Jee], [19, Jnn], [20, Jee], [20, Jnn], [21, Jee], [21, Jnn], [22, Jee], [22, Jnn], [27, Jee], [27, Jnn], [28, Jee], [28, Jnn], [29, Jee], [29, Jnn], [30, Jee], [30, Jnn], [31, Jee], [31, Jnn], [32, Jee], [32, Jnn], [33, Jee], [33, Jnn], [34, Jee], [34, Jnn], [35, Jee], [35, Jnn], [36, Jee], [36, Jnn], [37, Jee], [37, Jnn], [38, Jee], [38, Jnn], [40, Jee], [40, Jnn], [41, Jee], [41, Jnn], [42, Jee], [42, Jnn], [45, Jee], [45, Jnn], [46, Jee], [46, Jnn], [47, Jee], [47, Jnn], [48, Jee], [48, Jnn], [49, Jee], [49, Jnn], [50, Jee], [50, Jnn], [51, Jee], [51, Jnn], [52, Jee], [52, Jnn], [53, Jee], [53, Jnn], [54, Jee], [54, Jnn], [55, Jee], [55, Jnn], [56, Jee], [56, Jnn], [57, Jee], [57, Jnn], [61, Jee], [61, Jnn], [62, Jee], [62, Jnn], [63, Jee], [63, Jnn], [64, Jee], [64, Jnn], [65, Jee], [65, Jnn], [66, Jee], [66, Jnn], [67, Jee], [67, Jnn], [68, Jee], [68, Jnn], [69, Jee], [69, Jnn], [70, Jee], [70, Jnn], [71, Jee], [71, Jnn], [72, Jee], [72, Jnn], [73, Jee], [73, Jnn], [77, Jee], [77, Jnn], [78, Jee], [78, Jnn], [79, Jee], [79, Jnn], [80, Jee], [80, Jnn], [81, Jee], [81, Jnn], [82, Jee], [82, Jnn], [83, Jee], [83, Jnn], [84, Jee], [84, Jnn], [85, Jee], [85, Jnn], [86, Jee], [86, Jnn], [87, Jee], [87, Jnn], [88, Jee], [88, Jnn], [89, Jee], [89, Jnn], [90, Jee], [90, Jnn], [91, Jee], [91, Jnn], [92, Jee], [92, Jnn], [93, Jee], [93, Jnn], [94, Jee], [94, Jnn], [95, Jee], [95, Jnn], [96, Jee], [96, Jnn], [97, Jee], [97, Jnn], [98, Jee], [98, Jnn], [99, Jee], [99, Jnn], [100, Jee], [100, Jnn], [101, Jee], [101, Jnn], [102, Jee], [102, Jnn], [103, Jee], [103, Jnn], [104, Jee], [104, Jnn], [105, Jee], [105, Jnn], [106, Jee], [106, Jnn], [107, Jee], [107, Jnn], [108, Jee], [108, Jnn], [109, Jee], [109, Jnn], [110, Jee], [110, Jnn], [111, Jee], [111, Jnn], [112, Jee], [112, Jnn], [113, Jee], [113, Jnn], [114, Jee], [114, Jnn], [115, Jee], [115, Jnn], [116, Jee], [116, Jnn], [117, Jee], [117, Jnn], [118, Jee], [118, Jnn], [119, Jee], [119, Jnn], [120, Jee], [120, Jnn], [121, Jee], [121, Jnn], [122, Jee], [122, Jnn], [123, Jee], [123, Jnn], [124, Jee], [124, Jnn], [125, Jee], [125, Jnn], [126, Jee], [126, Jnn], [127, Jee], [127, Jnn], [128, Jee], [128, Jnn], [129, Jee], [129, Jnn], [130, Jee], [130, Jnn], [131, Jee], [131, Jnn], [132, Jee], [132, Jnn], [133, Jee], [133, Jnn], [134, Jee], [134, Jnn], [135, Jee], [135, Jnn], [136, Jee], [136, Jnn], [137, Jee], [137, Jnn], [138, Jee], [138, Jnn], [139, Jee], [139, Jnn], [140, Jee], [140, Jnn], [141, Jee], [141, Jnn], [142, Jee], [142, Jnn], [143, Jee], [143, Jnn], [144, Jee], [144, Jnn], [145, Jee], [145, Jnn], [146, Jee], [146, Jnn], [147, Jee], [147, Jnn], [148, Jee], [148, Jnn], [149, Jee], [149, Jnn], [150, Jee], [150, Jnn], [151, Jee], [151, Jnn], [152, Jee], [152, Jnn], [153, Jee], [153, Jnn], [154, Jee], [154, Jnn], [155, Jee], [155, Jnn], [159, Jee], [159, Jnn], [160, Jee], [160, Jnn], [161, Jee], [161, Jnn], [162, Jee], [162, Jnn], [163, Jee], [163, Jnn], [164, Jee], [164, Jnn], [165, Jee], [165, Jnn], [166, Jee], [166, Jnn], [167, Jee], [167, Jnn], [168, Jee], [168, Jnn], [169, Jee], [169, Jnn], [170, Jee], [170, Jnn], [171, Jee], [171, Jnn], [172, Jee], [172, Jnn], [173, Jee], [173, Jnn], [174, Jee], [174, Jnn], [175, Jee], [175, Jnn], [176, Jee], [176, Jnn], [177, Jee], [177, Jnn], [178, Jee], [178, Jnn], [179, Jee], [179, Jnn], [180, Jee], [180, Jnn], [181, Jee], [181, Jnn], [182, Jee], [182, Jnn], [183, Jee], [183, Jnn], [184, Jee], [184, Jnn], [185, Jee], [185, Jnn], [186, Jee], [186, Jnn], [187, Jee], [187, Jnn], [188, Jee], [188, Jnn], [189, Jee], [189, Jnn], [190, Jee], [190, Jnn], [191, Jee], [191, Jnn], [192, Jee], [192, Jnn], [193, Jee], [193, Jnn], [194, Jee], [194, Jnn], [195, Jee], [195, Jnn], [196, Jee], [196, Jnn], [197, Jee], [197, Jnn], [198, Jee], [198, Jnn], [199, Jee], [199, Jnn], [200, Jee], [200, Jnn], [201, Jee], [201, Jnn], [202, Jee], [202, Jnn], [204, Jee], [204, Jnn], [205, Jee], [205, Jnn], [206, Jee], [206, Jnn], [207, Jee], [207, Jnn], [208, Jee], [208, Jnn], [209, Jee], [209, Jnn], [210, Jee], [210, Jnn], [211, Jee], [211, Jnn], [212, Jee], [212, Jnn], [213, Jee], [213, Jnn], [214, Jee], [214, Jnn], [215, Jee], [215, Jnn], [216, Jee], [216, Jnn], [217, Jee], [217, Jnn], [218, Jee], [218, Jnn], [220, Jee], [220, Jnn], [221, Jee], [221, Jnn], [222, Jee], [222, Jnn], [223, Jee], [223, Jnn], [224, Jee], [224, Jnn], [225, Jee], [225, Jnn], [226, Jee], [226, Jnn], [227, Jee], [227, Jnn], [228, Jee], [228, Jnn], [229, Jee], [229, Jnn], [231, Jee], [231, Jnn], [232, Jee], [232, Jnn], [233, Jee], [233, Jnn], [234, Jee], [234, Jnn], [235, Jee], [235, Jnn], [237, Jee], [237, Jnn], [238, Jee], [238, Jnn], [239, Jee], [239, Jnn], [240, Jee], [240, Jnn], [241, Jee], [241, Jnn], [242, Jee], [242, Jnn], [243, Jee], [243, Jnn], [244, Jee], [244, Jnn], [245, Jee], [245, Jnn], [246, Jee], [246, Jnn], [250, Jee], [250, Jnn], [251, Jee], [251, Jnn], [252, Jee], [252, Jnn], [253, Jee], [253, Jnn], [255, Jee], [255, Jnn], [256, Jee], [256, Jnn], [261, Jee], [261, Jnn], [262, Jee], [262, Jnn], [266, Jee], [266, Jnn], [267, Jee], [267, Jnn], [268, Jee], [268, Jnn], [269, Jee], [269, Jnn], [270, Jee], [270, Jnn], [272, Jee], [272, Jnn], [281, Jee], [281, Jnn], [282, Jee], [282, Jnn], [283, Jee], [283, Jnn], [285, Jee], [285, Jnn], [295, Jee], [295, Jnn], [320, Jee], [320, Jnn], [321, Jee], [321, Jnn], [323, Jee], [323, Jnn], [324, Jee], [324, Jnn], [325, Jee], [325, Jnn], [326, Jee], [326, Jnn], [327, Jee], [327, Jnn], [328, Jee], [328, Jnn], [329, Jee], [329, Jnn], [331, Jee], [331, Jnn], [332, Jee], [332, Jnn], [333, Jee], [333, Jnn], [336, Jee], [336, Jnn], [340, Jee], [340, Jnn]]

jd_range: [2460542.168953923, 2460542.520492522]

freq_range: [46920776.3671875, 234298706.0546875]

# check output yaml file
flagged_indices = metrics_io.read_a_priori_int_flags(out_yaml_file, time_grid)
for i, apf in enumerate(apriori_time_flags):
    if i in flagged_indices:
        assert apf
    else:
        assert not apf
flagged_chans = metrics_io.read_a_priori_chan_flags(out_yaml_file, freqs=freqs)
for chan in range(len(freqs)):
    if chan in flagged_chans:
        assert np.all(flags[:, chan])
    else:
        assert not np.all(flags[:, chan])
flagged_ants = metrics_io.read_a_priori_ant_flags(out_yaml_file)
for ant in all_ants:
    if ant_flag_fracs[ant] == 1:
        assert ant in flagged_ants
    else:
        assert ant not in flagged_ants

for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')

hera_cal: 3.6.dev110+gcc0a13d
hera_qm: 2.1.5.dev6+g23b7cf7
hera_filters: 0.1.5

hera_notebook_templates: 0.1.dev734+g90f16f4
pyuvdata: 2.4.2

print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')

Finished execution in 462.85 minutes.

Full-Day Autocorrelation Checking¶

• Figure 1: Preliminary Array Flag Fraction Summary ¶

• Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags ¶

• Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude ¶

Parse input and output files¶

Parse settings¶

Classify Antennas and Find RFI Per-File¶

Figure 1: Preliminary Array Flag Fraction Summary¶

Load and Average Unflagged Autocorrelations¶

DPSS Filter Average Autocorrlations¶

Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶

Find Bad Time Ranges¶

Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶

Write a priori flags to a yaml¶

Metadata¶

Full-Day Autocorrelation Checking¶

• Figure 1: Preliminary Array Flag Fraction Summary¶

• Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶

• Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶

Parse input and output files¶

Parse settings¶

Classify Antennas and Find RFI Per-File¶

Figure 1: Preliminary Array Flag Fraction Summary¶

Load and Average Unflagged Autocorrelations¶

DPSS Filter Average Autocorrlations¶

Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags¶

Find Bad Time Ranges¶

Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude¶

Write a priori flags to a yaml¶

Metadata¶

• Figure 1: Preliminary Array Flag Fraction Summary ¶

• Figure 2: z-Score of DPSS-Filtered, Averaged Good Autocorrelation and Initial Flags ¶

• Figure 3: Proposed A Priori Time Flags Based on Frequency-Averaged and Convolved z-Score Magnitude ¶