diff --git a/LAMDA/bias_rmse_timeseries.py b/LAMDA/bias_rmse_timeseries.py new file mode 100644 index 0000000..d8fefab --- /dev/null +++ b/LAMDA/bias_rmse_timeseries.py @@ -0,0 +1,104 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from pyGSI.gsi_stat import GSIstat +from emcpy.plots.plots import LinePlot, HorizontalLine +from emcpy.plots import CreatePlot + + +def _plot_bias_rmse_timeseries(df, config, outdir): + """ + Used indexed df to plot rmse and bias. + """ + df = df.reset_index() + + # Grab data from dataframe + cycles = df['date'].dt.strftime("%d %b\n%Y %Hz") + omf_bc = df['OmF_bc'] + omf_wobc = df['OmF_wobc'] + rmse = df['rms'] + + # Create bias correction object + bc_line = LinePlot(cycles, omf_bc) + bc_line.label = 'Bias w/ BC' + bc_line.marker = 'o' + bc_line.markersize = 3 + bc_line.linestyle = '-.' + + # Create object without bias correction + wobc_line = LinePlot(cycles, omf_wobc) + wobc_line.color = 'tab:green' + wobc_line.label = 'Bias w/o BC' + wobc_line.marker = 'o' + wobc_line.markersize = 3 + wobc_line.linestyle = '--' + + # Create rmse + rmse_line = LinePlot(cycles, rmse) + rmse_line.color = 'tab:brown' + rmse_line.label = 'RMSE' + rmse_line.marker = 'o' + rmse_line.markersize = 3 + + # Create a line at 0 + zero_line = HorizontalLine(y=0) + + # Create plot and draw data + myplt = CreatePlot(figsize=(10, 6)) + plt_list = [bc_line, wobc_line, rmse_line, zero_line] + myplt.draw_data(plt_list) + + # Add features + myplt.set_ylim(-5, 5) + myplt.add_grid(linewidth=0.5, color='grey', linestyle='--') + myplt.add_legend(loc='lower right', fontsize='large') + + title = (f"{config['bias type']} RMSE and Bias Time Series\n{config['sensor']} " + f"{config['satellite']} Channel {config['channel']} tm0{config['tm']}") + myplt.add_title(title, fontsize=14) + + # Return matplotlib figure + fig = myplt.return_figure() + + # Save figure + save_cycles = df['date'].dt.strftime('%Y%m%d%H').to_numpy() + savefile = (f"{save_cycles[0]}_{save_cycles[-1]}_{config['sensor']}" + f"_{config['satellite']}_channel_{config['channel']}" + f"_{config['bias type']}_tm0{config['tm']}_rmse_bias_timeseries.png") + fig.savefig('./' + savefile, bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def bias_rmse_timeseries(df, config, outdir): + """ + Plots a timeseries of bias and rmse. + + Args: + df : (pandas dataframe) multidimensional pandas dataframe + with several cycles of gsi stats data + config : (dict) dictionary including informaton about the data + being plotted + outdir : (str) path to output figures + """ + # Select data by satellite and channel + for idx_col in ['satellite', 'channel']: + indx = df.index.get_level_values(idx_col) == '' + indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config[idx_col]) + df = df.iloc[indx] + + # Create omf and oma df + indx = df.index.get_level_values(idx_col) == '' + omf_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 1) + omf_df = df.iloc[omf_indx] + + oma_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 3) + oma_df = df.iloc[oma_indx] + + # Plot omf + config['bias type'] = 'OmF' + _plot_bias_rmse_timeseries(omf_df, config, outdir) + + # Plot oma + config['bias type'] = 'OmA' + _plot_bias_rmse_timeseries(oma_df, config, outdir) diff --git a/LAMDA/scripts/LAMDA_stats_workflow.py b/LAMDA/scripts/LAMDA_stats_workflow.py index 0cce1ba..f3ff37a 100644 --- a/LAMDA/scripts/LAMDA_stats_workflow.py +++ b/LAMDA/scripts/LAMDA_stats_workflow.py @@ -13,6 +13,7 @@ # import the plotting scripts i.e: # from LAMDA.obs_count import plot_obscount from LAMDA.minimization_plots import minimization_plots +from LAMDA.bias_rmse_timeseries import bias_rmse_timeseries def concatenate_dfs(files, variable, cycles, data_type): @@ -45,7 +46,8 @@ def concatenate_dfs(files, variable, cycles, data_type): return concatenated_df -def plotting(config, data_dict, outdir, data_type, ob_type): +def plotting(config, data_dict, outdir, data_type, ob_type, + experiment_name,): """ Main plotting function that gets all inputs for plotting scripts in order which includes fits_df, plotting_config, and outdir. @@ -60,13 +62,18 @@ def plotting(config, data_dict, outdir, data_type, ob_type): data_type : (str) determines if data is conventional or radiance ob_type : (str) the observation type + experiment_name : (str) the type of experiment i.e. + FV3LAMDA, LAMDAX, etc. """ data_inputs = config[0] plot_type = config[-1] - plotting_config = {} - plotting_config['ob_type'] = ob_type + plotting_config = { + 'ob type': ob_type, + 'data type': data_type, + 'experiment name': experiment_name + } if data_type == 'conventional': plotting_config['obsid'] = data_inputs[0] @@ -74,15 +81,19 @@ def plotting(config, data_dict, outdir, data_type, ob_type): plotting_config['obuse'] = data_inputs[-1] elif data_type == 'radiance': + plotting_config['sensor'] = ob_type.split('_')[0] + plotting_config['satellite'] = ob_type.split('_')[-1] + plotting_config['channel'] = data_inputs[0] + plotting_config['obuse'] = data_inputs[-1] + # change ob_type to just the sensor to utilize # GSIstat extract_sensor() ob_type = ob_type.split('_')[0] - plotting_config['channel'] = data_inputs[0] - plotting_config['obuse'] = data_inputs[-1] # Loop through t-minus hours to grab proper data and # generate plots for tm in np.arange(7): + plotting_config['tm'] = tm fits_data = [] cycles = [] @@ -94,7 +105,7 @@ def plotting(config, data_dict, outdir, data_type, ob_type): fits_df = concatenate_dfs(fits_data, ob_type, cycles, data_type) plot_dict = { - 'obs count': plot_obscount, + 'bias rmse timeseries': bias_rmse_timeseries } plot_dict[plot_type](fits_df, plotting_config, outdir) @@ -196,7 +207,8 @@ def stats_workflow(config_yaml, nprocs, outdir): # Create multiprocessing Pool p = Pool(processes=nprocs) p.map(partial(plotting, data_dict=data_dict, outdir=outdir, - ob_type=ob_type, data_type=data_type), work_list) + ob_type=ob_type, data_type=data_type, + experiment_name=experiment_name), work_list) if __name__ == '__main__': diff --git a/pyGSI/gsi_stat.py b/pyGSI/gsi_stat.py index e3a953f..ae510d9 100644 --- a/pyGSI/gsi_stat.py +++ b/pyGSI/gsi_stat.py @@ -149,18 +149,18 @@ def extract_instrument(self, obtype, instrument): tmp.append(tst) columns = ['it', 'channel', 'instrument', 'satellite', - 'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc', - 'col1', 'col2', 'col3'] + 'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc', + 'pen', 'rms', 'std'] df = pd.DataFrame(data=tmp, columns=columns) - df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1) +# df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1) df[['channel', 'nassim', 'nrej']] = df[[ 'channel', 'nassim', 'nrej']].astype(np.int) - df[['oberr', 'OmF_bc', 'OmF_wobc']] = df[[ - 'oberr', 'OmF_bc', 'OmF_wobc']].astype(np.float) + df[['oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']] = df[[ + 'oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']].astype(np.float) # Since iteration number is not readily available, make one lendf = len(df) - nouter = np.int(lendf / len(df['it'].unique())) + nouter = np.int(np.round(lendf / len(df['it'].unique()))) douter = np.int(lendf / nouter) it = np.zeros(lendf, dtype=int) for i in range(nouter): @@ -170,7 +170,8 @@ def extract_instrument(self, obtype, instrument): df['it'] = it df = df[['it', 'instrument', 'satellite', 'channel', - 'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc']] + 'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc', + 'pen', 'rms', 'std']] df.set_index(['it', 'instrument', 'satellite', 'channel'], inplace=True)