diff --git a/LAMDA/LAMDA_tools.py b/LAMDA/LAMDA_tools.py new file mode 100644 index 0000000..4208047 --- /dev/null +++ b/LAMDA/LAMDA_tools.py @@ -0,0 +1,56 @@ +import numpy as np +import pandas as pd + + +def land_fraction(self, df): + """ + Returns dataframe for only land data. + """ + df = df[df['land_fraction'] > 0] + + return df + + +def water_fraction(self, df): + """ + Returns dataframe for only water data. + """ + df = df[df['water_fraction'] > 0] + + return df + + +def cloud_fraction(self, df): + """ + Returns dataframe for only cloud data. + """ + df = df[df['cloud_fraction'] > 0] + + return df + + +def vegetation_fraction(self, df): + """ + Returns dataframe for only vegetation data. + """ + df = df[df['vegetation_fraction'] > 0] + + return df + + +def ice_fraction(self, df): + """ + Returns dataframe for only ice data. + """ + df = df[df['ice_fraction'] > 0] + + return df + + +def snow_fraction(self, df): + """ + Returns dataframe for only snow data. + """ + df = df[df['snow_fraction'] > 0] + + return df diff --git a/LAMDA/__init__.py b/LAMDA/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/LAMDA/bias_rmse_timeseries.py b/LAMDA/bias_rmse_timeseries.py new file mode 100644 index 0000000..2f3a451 --- /dev/null +++ b/LAMDA/bias_rmse_timeseries.py @@ -0,0 +1,202 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from pyGSI.gsi_stat import GSIstat +from emcpy.plots.plots import LinePlot, HorizontalLine +from emcpy.plots import CreatePlot + + +def _get_conventional_data(df, config): + """ + Grabs appropriate data from the dataframe + and creates EMCPy plot list, title and + save file name for conventional data. + """ + + indx = df.index.get_level_values('stat') == '' + bias_indx = np.ma.logical_or(indx, df.index.get_level_values('stat') == 'bias') + rmse_indx = np.ma.logical_or(indx, df.index.get_level_values('stat') == 'rms') + bias_df = df.iloc[bias_indx].reset_index() + rmse_df = df.iloc[rmse_indx].reset_index() + + data_col = rmse_df.columns[-1] + + # Grab data from dataframe + cycles = rmse_df['date'].dt.strftime("%d %b\n%Y %Hz") + rmse = rmse_df[data_col] + bias = bias_df[data_col] + + # Create bias object + bias_line = LinePlot(cycles, bias) + bias_line.label = 'Bias' + bias_line.marker = 'o' + bias_line.markersize = 3 + + # Create rmse + rmse_line = LinePlot(cycles, rmse) + rmse_line.color = 'tab:green' + rmse_line.label = 'RMSE' + rmse_line.marker = 'o' + rmse_line.markersize = 3 + + # Create a line at 0 + zero_line = HorizontalLine(y=0) + + # Add objects to plot list + plot_list = [bias_line, rmse_line, zero_line] + + # Create title + title = (f"{config['experiment name']} {config['bias type']} " + f"RMSE and Bias Time Series\nOb Type: {config['ob type']} " + f"Type: {config['obsid']} Subtype: {config['subtype']} " + f"tm0{config['tm']}") + config['title'] = title + + # Create save file name + save_cycles = bias_df['date'].dt.strftime('%Y%m%d%H').to_numpy() + savefile = (f"{save_cycles[0]}_{save_cycles[-1]}_{config['experiment name']}_" + f"{config['ob type']}_{config['obsid']}_{config['subtype']}_" + f"{config['bias type']}_tm0{config['tm']}_rmse_bias_timeseries.png") + config['save file'] = savefile + + return plot_list, config + + +def _get_radiance_data(df, config): + """ + Grabs appropriate data from the dataframe + and creates EMCPy plot list, title and + save file name for radiance data. + """ + + df = df.reset_index() + + # Grab data from dataframe + cycles = df['date'].dt.strftime("%d %b\n%Y %Hz") + omf_bc = df['OmF_bc'] + omf_wobc = df['OmF_wobc'] + rmse = df['rms'] + + # Create bias correction object + bc_line = LinePlot(cycles, omf_bc) + bc_line.label = 'Bias w/ BC' + bc_line.marker = 'o' + bc_line.markersize = 3 + bc_line.linestyle = '-.' + + # Create object without bias correction + wobc_line = LinePlot(cycles, omf_wobc) + wobc_line.color = 'tab:green' + wobc_line.label = 'Bias w/o BC' + wobc_line.marker = 'o' + wobc_line.markersize = 3 + wobc_line.linestyle = '--' + + # Create rmse + rmse_line = LinePlot(cycles, rmse) + rmse_line.color = 'tab:brown' + rmse_line.label = 'RMSE' + rmse_line.marker = 'o' + rmse_line.markersize = 3 + + # Create a line at 0 + zero_line = HorizontalLine(y=0) + + # Add objects to plot list + plot_list = [bc_line, wobc_line, rmse_line, zero_line] + + # Create title + title = (f"{config['experiment name']} {config['bias type']} " + f"RMSE and Bias Time Series \n{config['sensor']} " + f"{config['satellite']} Channel {config['channel']} " + f"tm0{config['tm']}") + config['title'] = title + + # Create save file name + save_cycles = df['date'].dt.strftime('%Y%m%d%H').to_numpy() + + savefile = (f"{save_cycles[0]}_{save_cycles[-1]}_{config['experiment name']}_" + f"{config['sensor']}_{config['satellite']}_channel_{config['channel']}" + f"_{config['bias type']}_tm0{config['tm']}_rmse_bias_timeseries.png") + config['save file'] = savefile + + return plot_list, config + + +def _plot_bias_rmse_timeseries(df, config, outdir): + """ + Used indexed df to plot rmse and bias. + """ + + if config['data type'] == 'radiance': + plot_list, config = _get_radiance_data(df, config) + + elif config['data type'] == 'conventional': + plot_list, config = _get_conventional_data(df, config) + + # Create plot and draw data + myplt = CreatePlot(figsize=(10, 6)) + myplt.draw_data(plot_list) + + # Add features + myplt.set_ylim(-5, 5) + myplt.add_grid(linewidth=0.5, color='grey', linestyle='--') + myplt.add_legend(loc='lower right', fontsize='large') + + myplt.add_title(config['title'], fontsize=14) + + # Return matplotlib figure and save + fig = myplt.return_figure() + fig.savefig(outdir + config['save file'], bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def bias_rmse_timeseries(df, config, outdir): + """ + Plots a timeseries of bias and rmse. + + Args: + df : (pandas dataframe) multidimensional pandas dataframe + with several cycles of gsi stats data + config : (dict) dictionary including informaton about the data + being plotted + outdir : (str) path to output figures + """ + + if config['data type'] == 'radiance': + + # Select data by satellite and channel + for idx_col in ['satellite', 'channel']: + indx = df.index.get_level_values(idx_col) == '' + indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config[idx_col]) + df = df.iloc[indx] + + elif config['data type'] == 'conventional': + for idx_col in ['typ', 'use']: + d = { + 'typ': config['obsid'], + 'styp': config['subtype'], + 'use': 'asm' + } + + config_val = d[idx_col] + indx = df.index.get_level_values(idx_col) == '' + indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config_val) + df = df.iloc[indx] + + # Create omf and oma df + indx = df.index.get_level_values('it') == '' + omf_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 1) + omf_df = df.iloc[omf_indx] + + oma_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 3) + oma_df = df.iloc[oma_indx] + + # Plot omf + config['bias type'] = 'OmF' + _plot_bias_rmse_timeseries(omf_df, config, outdir) + + # Plot oma + config['bias type'] = 'OmA' + _plot_bias_rmse_timeseries(oma_df, config, outdir) diff --git a/LAMDA/bias_stddev_channel.py b/LAMDA/bias_stddev_channel.py new file mode 100644 index 0000000..4fe9eb3 --- /dev/null +++ b/LAMDA/bias_stddev_channel.py @@ -0,0 +1,126 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from pyGSI.gsi_stat import GSIstat +from emcpy.plots.plots import LinePlot, HorizontalLine +from emcpy.plots import CreatePlot + + +def _plot_bias_stddev_channel(df, config, outdir): + """ + Plot the bias and standard deviation per channel + for a single cycle and an average for multiple + cycles. + """ + # Create single cycle and average dataframe + cycles = df.index.get_level_values(0).unique() + n_days = len(cycles)/2 + scyc_df = df.loc[cycles[-1]] + avg_df = df.groupby(level=[1, 2, 3, 4]).mean() + + # Get data + channels = df.index.get_level_values(-1).unique().to_numpy() + omf_bc = scyc_df['OmF_bc'] + omf_std = scyc_df['std'] + avg_bc = avg_df['OmF_bc'] + avg_std = avg_df['std'] + + # Create bias correction object + bc_line = LinePlot(channels, omf_bc) + bc_line.color = 'tab:green' + bc_line.label = 'OmF' + bc_line.marker = 'o' + bc_line.markersize = 4 + bc_line.linewidth = 3 + + # Create standard deviation object + std_dev = LinePlot(channels, omf_std) + std_dev.color = 'tab:orange' + std_dev.label = 'Std Dev' + std_dev.marker = 'o' + std_dev.markersize = 4 + std_dev.linewidth = 3 + + # Create average bias correction object + avg_bc_line = LinePlot(channels, avg_bc) + avg_bc_line.color = 'tab:green' + avg_bc_line.label = f'{n_days} Day OmF Average' + avg_bc_line.marker = 'o' + avg_bc_line.markersize = 4 + avg_bc_line.linewidth = 3 + avg_bc_line.linestyle = '--' + + # Create average standard deviation object + avg_std_dev = LinePlot(channels, avg_std) + avg_std_dev.color = 'tab:orange' + avg_std_dev.label = f'{n_days} Day Std Average' + avg_std_dev.marker = 'o' + avg_std_dev.markersize = 4 + avg_std_dev.linewidth = 3 + avg_std_dev.linestyle = '--' + + # Create a line at 0 + zero_line = HorizontalLine(y=0) + zero_line.linewidth = 1.25 + + # Create plot and draw data + myplt = CreatePlot(figsize=(10, 6)) + plt_list = [bc_line, avg_bc_line, std_dev, avg_std_dev, zero_line] + myplt.draw_data(plt_list) + + # Add features + myplt.set_ylim(-3, 3) + myplt.set_xticks(channels) + myplt.add_grid(linewidth=0.5, color='grey', linestyle='--') + myplt.add_legend(loc='lower right', fontsize='large') + myplt.add_xlabel('Channels') + + str_cycles = cycles.strftime('%Y%m%d%H').to_numpy() + left_title = (f"{config['bias type']} (with Bias Correction) - Observed (K)" + f"\n{config['sensor']}_{config['satellite']}") + right_title = f"{str_cycles[-1]}" + myplt.add_title(left_title, loc='left', fontsize=14) + myplt.add_title(right_title, loc='right', fontsize=12, fontweight='semibold') + + # Return matplotlib figure + fig = myplt.return_figure() + savefile = (f"{str_cycles[-1]}_{config['sensor']}_{config['satellite']}" + f"_channel_{config['channel']}_{config['bias type']}" + f"_bias_std_channel.png") + fig.savefig('./' + savefile, bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def bias_stddev_channel(df, config, outdir): + """ + Plots bias and standard deviation per channel. + + Args: + df : (pandas dataframe) multidimensional pandas dataframe + with several cycles of gsi stats data + config : (dict) dictionary including informaton about the data + being plotted + outdir : (str) path to output figures + """ + # Select data by satellite and channel + for idx_col in ['satellite']: + indx = df.index.get_level_values(idx_col) == '' + indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config[idx_col]) + df = df.iloc[indx] + + # Create omf and oma df + indx = df.index.get_level_values(idx_col) == '' + omf_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 1) + omf_df = df.iloc[omf_indx] + + oma_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 3) + oma_df = df.iloc[oma_indx] + + # Plot omf + config['bias type'] = 'Ges' + _plot_bias_stddev_channel(omf_df, config, outdir) + + # Plot oma + config['bias type'] = 'Anl' + _plot_bias_stddev_channel(oma_df, config, outdir) diff --git a/LAMDA/layer_histogram.py b/LAMDA/layer_histogram.py new file mode 100644 index 0000000..85d5371 --- /dev/null +++ b/LAMDA/layer_histogram.py @@ -0,0 +1,155 @@ +import numpy as np +import yaml +import os +from pathlib import Path +import matplotlib.pyplot as plt +import LAMDA.plot_features as features +from LAMDA.no_data_plots import no_data_map +from emcpy.plots.map_plots import MapScatter +from emcpy.plots import CreateMap +from emcpy.plots.map_tools import Domain, MapProjection +from pyGSI.diags import Conventional, Radiance, Ozone +from emcpy.plots.plots import Histogram, LinePlot +from emcpy.plots.create_plots import CreatePlot +import matplotlib.mlab as mlab +import scipy.stats as stats + + +def create_bins(data, nbins=50): + """ + Creates an array of the bins to be used. + + Args: + data : (array) data being used in histogram + nbins: (int;default=50) number of bins to use + """ + _max = np.max(np.abs(data)) + bins = np.linspace(-_max, _max, nbins) + + return bins + + +def _create_hist_layer(df_ges, df_anl, domain, + metadata, outdir): + """ + Create the layered histogram figure and plot data omf vs oma. + """ + plot_objects = [] + + lats = df_ges['latitude'].to_numpy() + lons = df_anl['longitude'].to_numpy() + omf = df_ges['omf_adjusted'].to_numpy() + oma = df_anl['omf_adjusted'].to_numpy() + + omf_density = stats.gaussian_kde(omf) + oma_density = stats.gaussian_kde(oma) + + omf_bins = create_bins(omf, 50) + oma_bins = create_bins(oma, 50) + + # Creat Line object using histogram bins and the density + omf_line = LinePlot(omf_bins, omf_density(omf_bins)) + omf_line.color = 'tab:green' + omf_line.label = 'O-B' + oma_line = LinePlot(oma_bins, oma_density(oma_bins)) + oma_line.color = 'tab:purple' + oma_line.label = 'O-A' + + # Create histogram plot and draw data + myplt = CreatePlot() + plt_list = [omf_line, oma_line] + myplt.draw_data(plt_list) + + # Add features + labels = features.get_labels(metadata) + # Titles + labels = features.get_labels(metadata) + myplt.add_title(labels['title'], loc='left', fontsize=12) + myplt.add_title(labels['date title'], loc='right', fontsize=12, + fontweight='semibold') + myplt.add_xlabel(xlabel='FG Departure') + myplt.add_ylabel(ylabel='Normalized Count') + myplt.add_legend() + + # Return matplotlib figure + fig = myplt.return_figure() + str_domain = domain.replace(" ", "_") + str_hist = "layered_histogram" + fig.savefig(outdir + f"{labels['save file']}_{str_domain}_{str_hist}.png", + bbox_inches='tight', pad_inches=0.1) + plt.close('all') + return + + +def layer_histogram(config): + """ + + Create layered histogram plots with omf and oma. + + Args: + config : (dict) configuration file that includes the + appropriate inputs based on file type (i.e. + conventional or radiance data) + """ + + # Get filename to determing what the file type is + filename = os.path.splitext(Path(config['diag file']).stem)[0] + filetype = filename.split('_')[1] + + # get the anl diag file + anl_filename = config['diag file'].replace('ges', 'anl') + + if filetype == 'conv': + diag = Conventional(config['diag file']) + + df = diag.get_data(obsid=[config['observation id']], + subtype=[config['observation subtype']], + analysis_use=config['analysis use']) + metadata = diag.metadata + metadata['ObsID Name'] = features.get_obs_type( + [config['observation id']]) + + else: + diag_ges = Radiance(config['diag file']) + diag_anl = Radiance(anl_filename) + + df_ges = diag_ges.get_data(channel=[config['channel']], + qcflag=[config['qc flag']], + analysis_use=config['analysis use']) + df_anl = diag_anl.get_data(channel=[config['channel']], + qcflag=[config['qc flag']], + analysis_use=config['analysis use']) + metadata = diag_ges.metadata + + # Grab qc flags + qc_unique = sorted(np.unique(np.abs(diag_ges.qc_flags))) + + metadata['Diag Type'] = 'Obs_Minus_Forecast_adjusted' + + # Handles analysis use data + anl_use = metadata['Anl Use'] + + if anl_use: + for anl_type in df_ges.keys(): + metadata['Anl Use Type'] = anl_type + + if filetype == 'conv': + # Need to grab qc_unique here for conv based on + # analysis type (assimilated, rejected, monitored) + qc_unique = sorted(np.unique( + np.abs(df[anl_type]['prep_qc_mark']))) + + _create_hist_layer(df_ges['assimilated'], df_anl['assimilated'], + config['domain'], metadata, config['outdir']) + + else: + metadata['Anl Use Type'] = None + + if filetype == 'conv': + # Need to grab qc_unique here for conv based on + # analysis type (assimilated, rejected, monitored) + qc_unique = sorted(np.unique( + np.abs(df['prep_qc_mark']))) + + _create_hist_layer(df_ges['assimilated'], df_anl['assimilated'], + config['domain'], metadata, config['outdir']) diff --git a/LAMDA/map_departures.py b/LAMDA/map_departures.py new file mode 100644 index 0000000..6dadc99 --- /dev/null +++ b/LAMDA/map_departures.py @@ -0,0 +1,120 @@ +import numpy as np +import yaml +import os +from pathlib import Path +import matplotlib.pyplot as plt +import LAMDA.plot_features as features +from LAMDA.no_data_plots import no_data_map +from emcpy.plots.map_plots import MapScatter +from emcpy.plots import CreateMap +from emcpy.plots.map_tools import Domain, MapProjection +from pyGSI.diags import Conventional, Radiance, Ozone + + +def _create_map_departures(df, domain, projection, + metadata, outdir): + """ + Create the map figure and plot data. + """ + # this for now puts all O-F/O-A together, might want to split by + # monitored/rejected/assim later + # grab variables for plotting + lats = df['latitude'].to_numpy() + lons = df['longitude'].to_numpy() + omf = df['omf_adjusted'].to_numpy() + + plot_objects = [] + + # Create map object + mymap = CreateMap(figsize=(12, 8), + domain=Domain(domain), + proj_obj=MapProjection(projection)) + # Add coastlines and states + mymap.add_features(['coastlines', 'states']) + + # determine vmin/vmax for colorbar and must be symmetric + if len(lats) > 0: + omfscatter = MapScatter(latitude=lats, + longitude=lons, + data=omf) + omfscatter.markersize = 2 + omfscatter.cmap = 'coolwarm' + # determine min/max of values + maxval = np.nanmax(omf) + minval = np.nanmin(omf) + vmax = max(abs(minval), maxval) + omfscatter.vmin = vmax * -1 + omfscatter.vmax = vmax + # add data to plot objects + plot_objects.append(omfscatter) + # labels and annotations + labels = features.get_labels(metadata) + # draw data + mymap.draw_data(plot_objects) + # add labels + mymap.add_colorbar(label=metadata['Diag Type'], + label_fontsize=12, extend='neither') + mymap.add_title(labels['title'], loc='left', fontsize=12) + mymap.add_title(labels['date title'], loc='right', fontsize=12, + fontweight='semibold') + mymap.add_xlabel("Longitude", fontsize=12) + mymap.add_ylabel("Latitude", fontsize=12) + # add stats to figure + stats_dict = { + 'nobs': len(lats), + 'min': str(np.round(minval, 4)), + 'max': str(np.round(maxval, 4)), + } + mymap.add_stats_dict(stats_dict=stats_dict) + # save figure + fig = mymap.return_figure() + str_domain = domain.replace(" ", "_") + plt.savefig(outdir + f"{labels['save file']}_{str_domain}.png", + bbox_inches='tight', pad_inches=0.1) + plt.close('all') + + else: + # no data to plot + fig = no_data_map(mymap, Domain(domain), metadata) + + +def map_departures(config): + """ + Create map of departures (O-F and O-A) + + Args: + config : (dict) configuration file that includes the + appropriate inputs based on file type (i.e. + conventional or radiance data) + """ + + # Get filename to determing what the file type is + filename = os.path.splitext(Path(config['diag file']).stem)[0] + filetype = filename.split('_')[1] + + if filetype == 'conv': + diag = Conventional(config['diag file']) + df = diag.get_data(obsid=[config['observation id']], + subtype=[config['observation subtype']], + analysis_use=config['analysis use']) + metadata = diag.metadata + metadata['ObsID Name'] = features.get_obs_type([config['observation id']]) + + else: + diag = Radiance(config['diag file']) + df = diag.get_data(channel=[config['channel']], + analysis_use=config['analysis use']) + metadata = diag.metadata + + metadata['Diag Type'] = config['data type'] + + anl_use = config['analysis use'] + if anl_use: + for anl_type in df.keys(): + metadata['Anl Use Type'] = anl_type + _create_map_departures(df[anl_type], config['domain'], config['projection'], + metadata, config['outdir']) + else: + metadata['Anl Use Type'] = None + _create_map_departures(df, config['domain'], config['projection'], + metadata, config['outdir']) diff --git a/LAMDA/map_qc_flags.py b/LAMDA/map_qc_flags.py new file mode 100644 index 0000000..fc18f2f --- /dev/null +++ b/LAMDA/map_qc_flags.py @@ -0,0 +1,144 @@ +import numpy as np +import yaml +import os +from pathlib import Path +import matplotlib.pyplot as plt +import LAMDA.plot_features as features +from LAMDA.no_data_plots import no_data_map +from emcpy.plots.map_plots import MapScatter +from emcpy.plots import CreateMap +from emcpy.plots.map_tools import Domain, MapProjection +from pyGSI.diags import Conventional, Radiance, Ozone + + +def _create_map_qc(df, qc_unique, domain, projection, + metadata, outdir): + """ + Create the map figure and plot data. + """ + plot_objects = [] + # Loop through unique QC Flags to create plot objects + for i, flag in enumerate(qc_unique): + if metadata['Diag File Type'] == 'conventional': + indx = df.index[df['prep_qc_mark'] == flag] + else: + indx = (df.index.get_level_values('QC_Flag') == flag) + + lats = df['latitude'][indx].to_numpy() + lons = df['longitude'][indx].to_numpy() + + # If data is not empty, creates scatter object + if len(lats) > 0: + plotobj = MapScatter(latitude=lats, + longitude=lons) + plotobj.color = features.qc_flag_colors(flag) + plotobj.label = flag + plot_objects.append(plotobj) + + # Create map object + mymap = CreateMap(figsize=(12, 8), + domain=Domain(domain), + proj_obj=MapProjection(projection)) + # Add coastlines + mymap.add_features(['coastlines', 'states']) + + # If there is no data, create figure that displays 'No Data' + if len(plot_objects) == 0: + fig = no_data_map(mymap, Domain(domain), metadata) + + else: + # Draw data + mymap.draw_data(plot_objects) + + # Add legend + ncol = 2 if len(plot_objects) > 4 else 1 + legend = mymap.add_legend(loc='lower left', ncol=ncol, + title='QC Flags') + + # Titles + labels = features.get_labels(metadata) + mymap.add_title(labels['title'], loc='left', fontsize=12) + mymap.add_title(labels['date title'], loc='right', fontsize=12, + fontweight='semibold') + + # X and Y labels + mymap.add_xlabel("Longitude", fontsize=12) + mymap.add_ylabel("Latitude", fontsize=12) + + # Return figure + fig = mymap.return_figure() + + str_domain = domain.replace(" ", "_") + plt.savefig(outdir + f"{labels['save file']}_{str_domain}.png", + bbox_inches='tight', pad_inches=0.1) + plt.close('all') + + return + + +def map_qc_flags(config): + """ + Create map plotting location of qcflags. + + Args: + config : (dict) configuration file that includes the + appropriate inputs based on file type (i.e. + conventional or radiance data) + """ + + # Get filename to determing what the file type is + filename = os.path.splitext(Path(config['diag file']).stem)[0] + filetype = filename.split('_')[1] + + if filetype == 'conv': + diag = Conventional(config['diag file']) + + df = diag.get_data(obsid=[config['observation id']], + subtype=[config['observation subtype']], + analysis_use=config['analysis use']) + metadata = diag.metadata + metadata['ObsID Name'] = features.get_obs_type( + [config['observation id']]) + + else: + diag = Radiance(config['diag file']) + + df = diag.get_data(channel=[config['channel']], + qcflag=[config['qc flag']], + analysis_use=config['analysis use']) + metadata = diag.metadata + + # Grab qc flags + qc_unique = sorted(np.unique(np.abs(diag.qc_flags))) + + metadata['Diag Type'] = 'QC_Flags' + + # Handles analysis use data + anl_use = metadata['Anl Use'] + + if anl_use: + for anl_type in df.keys(): + metadata['Anl Use Type'] = anl_type + + if filetype == 'conv': + # Need to grab qc_unique here for conv based on + # analysis type (assimilated, rejected, monitored) + qc_unique = sorted(np.unique( + np.abs(df[anl_type]['prep_qc_mark']))) + + _create_map_qc(df[anl_type], qc_unique, + config['domain'], config['projection'], + metadata, config['outdir']) + + else: + metadata['Anl Use Type'] = None + + if filetype == 'conv': + # Need to grab qc_unique here for conv based on + # analysis type (assimilated, rejected, monitored) + qc_unique = sorted(np.unique( + np.abs(df['prep_qc_mark']))) + + _create_map_qc(df, qc_unique, config['domain'], + config['projection'], metadata, + config['outdir']) diff --git a/LAMDA/minimization_plots.py b/LAMDA/minimization_plots.py new file mode 100644 index 0000000..e0e803f --- /dev/null +++ b/LAMDA/minimization_plots.py @@ -0,0 +1,138 @@ +import numpy as np +import pandas as pd +from datetime import datetime +import matplotlib.pyplot as plt +from emcpy.plots.plots import LinePlot +from emcpy.plots import CreatePlot + +__all__ = ['plot_minimization'] + + +def _plot_cost_function(df, config, outdir): + """ + Use data from dataframe to plot the cost function. + """ + # Get cycle info + cycle = df.index.get_level_values(0)[-1] + cyclestr = datetime.strftime(cycle, '%Y%m%d%H') + n_cycles = len(np.unique(df.index.get_level_values(0))) + + # Create approriate dataframes + current_cycle_df = df.loc[cycle] + avg_df = df.groupby(level=[1, 2]).mean() + + # Grab data + j = current_cycle_df['J'].to_numpy() + x = np.arange(len(j)) + + avg_j = avg_df['J'].to_numpy() + avg_x = np.arange(len(avg_j)) + + # Create LinePlot objects + cost_plot = LinePlot(x, j) + cost_plot.linewidth = 2 + cost_plot.label = 'Cost' + + avg_cost_plot = LinePlot(avg_x, avg_j) + avg_cost_plot.color = 'tab:red' + avg_cost_plot.linewidth = 2 + avg_cost_plot.label = f'Last {n_cycles} cycles Average' + + # Create Plot + myplot = CreatePlot() + myplot.draw_data([cost_plot, avg_cost_plot]) + myplot.set_yscale('log') + myplot.add_grid() + myplot.set_xlim(0, len(j)-1) + myplot.add_xlabel('Iterations') + myplot.add_ylabel('log (J)') + myplot.add_legend(loc='upper right', + fontsize='large') + + title = f"{config['experiment']} Cost - {config['tm']}" + myplot.add_title(title, loc='left') + myplot.add_title(cyclestr, loc='right', + fontweight='semibold') + + fig = myplot.return_figure() + + savefile = (f"{cyclestr}_{config['experiment']}_" + + f"tm0{config['tm']}_cost_function.png") + plt.savefig(outdir + savefile, bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def _plot_gnorm(df, config, outdir): + """ + Use date from dataframe to plot gnorm. + """ + # Get cycle info + cycle = df.index.get_level_values(0)[-1] + cyclestr = datetime.strftime(cycle, '%Y%m%d%H') + n_cycles = len(np.unique(df.index.get_level_values(0))) + + # Create approriate dataframes + current_cycle_df = df.loc[cycle] + avg_df = df.groupby(level=[1, 2]).mean() + + # Grab data + gJ = current_cycle_df['gJ'].to_numpy() + gJ = np.log(gJ/gJ[0]) + x = np.arange(len(gJ)) + + avg_gJ = avg_df['gJ'].to_numpy() + avg_gJ = np.log(avg_gJ/avg_gJ[0]) + avg_x = np.arange(len(avg_gJ)) + + # Create LinePlot objects + gnorm = LinePlot(x, gJ) + gnorm.linewidth = 2 + gnorm.label = 'gnorm' + + avg_gnorm = LinePlot(avg_x, avg_gJ) + avg_gnorm.color = 'tab:red' + avg_gnorm.linewidth = 2 + avg_gnorm.linestyle = '--' + avg_gnorm.label = f'Last {n_cycles} cycles Average' + + # Create Plot + myplot = CreatePlot() + myplot.draw_data([gnorm, avg_gnorm]) + # myplot.set_yscale('log') + myplot.add_grid() + myplot.set_xlim(0, len(gJ)-1) + myplot.add_xlabel('Iterations') + myplot.add_ylabel('log (gnorm)') + myplot.add_legend(loc='upper right', + fontsize='large') + + title = f"{config['experiment']} gnorm - {config['tm']}" + myplot.add_title(title, loc='left') + myplot.add_title(cyclestr, loc='right', + fontweight='semibold') + + fig = myplot.return_figure() + + savefile = (f"{cyclestr}_{config['experiment']}_" + + f"tm0{config['tm']}_gnorm.png") + plt.savefig(outdir + savefile, bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def minimization_plots(df, plotting_config, outdir): + """ + Plot minimization diagnostics including gnorm and cost function + and save them to outdir. + + Args: + df : (pandas dataframe) dataframe with appropriate information + from GSI Stat file + plotting_config : (dict) dictionary with information about + minimization period + outdir : (str) path to output diagnostics + """ + + _plot_gnorm(df, plotting_config, outdir) + _plot_cost_function(df, plotting_config, outdir) diff --git a/LAMDA/no_data_plots.py b/LAMDA/no_data_plots.py new file mode 100644 index 0000000..99f9199 --- /dev/null +++ b/LAMDA/no_data_plots.py @@ -0,0 +1,42 @@ +import numpy as np +import matplotlib.pyplot as plt +import LAMDA.plot_features as features + + +def no_data_map(plotmap, domain, metadata): + """ + Creates a plot with 'No Data' across the map if + there is no data. + + Args: + plotmap : (object) Map object created from emcpy + domain : (object) Domain object created from emcpy + metadata : (dict) metadata dictionary created from + PyGSI + Returns: + fig : (matplotlib figure) figure with 'No Data' displayed + """ + # Titles + labels = features.get_labels(metadata) + plotmap.add_title(labels['title'], loc='left', fontsize=12) + plotmap.add_title(label=labels['date title'], + loc='right', fontsize=12, + fontweight='semibold') + + # Get center of map location + lon1 = domain.extent[0]+180 + lon2 = domain.extent[1]+180 + xloc = (lon2 - ((lon2-lon1)/2)) - 180 + + lat1 = domain.extent[2]+90 + lat2 = domain.extent[3]+90 + yloc = (lat2 - (lat2-lat1)/2) - 90 + + # Plot text + plotmap.add_text(xloc, yloc, 'No Data', fontsize=32, + alpha=0.6, horizontalalignment='center') + + # Return figure + fig = plotmap.return_figure() + + return fig diff --git a/LAMDA/plot_features.py b/LAMDA/plot_features.py new file mode 100644 index 0000000..4bf574b --- /dev/null +++ b/LAMDA/plot_features.py @@ -0,0 +1,285 @@ +from datetime import datetime +import numpy as np +from textwrap import TextWrapper +import matplotlib.pyplot as plt +from emcpy.plots.plots import Scatter, Histogram, VerticalLine +from emcpy.plots.map_plots import MapScatter +from emcpy.plots import CreateMap, CreatePlot, VariableSpecs +from emcpy.plots.map_tools import Domain, MapProjection +import matplotlib +matplotlib.use('agg') + +__all__ = ['get_obs_type', 'get_bins', 'calculate_stats', + 'get_labels', 'varspecs_name'] + + +def qc_flag_colors(qcflag): + """ + Dictionary of colors to use for specific QC flag numbers. + Args: + qcflag : (int) qc flag number + Returns: + color : (str) color based on qc flag number + """ + qc_flag_colors = { + 0: 'tab:green', + 1: 'black', + 2: 'tab:pink', + 3: 'tab:red', + 4: 'tab:orange', + 5: 'tab:brown', + 6: 'tab:olive', + 7: 'skyblue', + 8: 'tab:blue', + 9: 'maroon', + 10: 'tab:cyan', + 11: 'slateblue', + 12: 'navy', + 13: 'tan', + 14: 'gold', + 15: 'darkcyan', + 50: 'indigo', + 51: 'tab:purple', + 52: 'magenta', + 53: 'cornflowerblue', + 54: 'pink', + 55: 'lightgrey', + 56: 'darkgrey', + 57: 'dimgrey' + } + + return qc_flag_colors[qcflag] + + +def get_obs_type(obs_id): + """ + Grabs the full name of a specific observation. + Args: + obs_id: (list of ints) ID number(s) for conventional + diagnostic + Returns: + list of observation names. If ID in the list, it will return, + the proper name. If no observation ID, returns 'All Observations'. + If ID not in the list, returns list of string of the ID number. + """ + + obs_indicators = { + 120: "Rawinsonde", + 126: "RASS", + 130: "Aircraft: AIREP and PIREP", + 131: "Aircraft: AMDAR", + 132: "Flight-Level Reconnaissance and Profile Dropsonde", + 133: "Aircraft: MDCRS ACARS", + 134: "Aircraft: TAMDAR", + 135: "Aircraft: Canadian AMDAR", + 153: "GPS-Integrated Precipitable Water", + 180: ("Surface Marine w/ Station Pressure (Ship, Buoy, C-MAN, " + "Tide Guage)"), + 181: "Surface Land w/ Station Pressure (Synoptic, METAR)", + 182: "Splash-Level Dropsonde Over Ocean", + 183: "Surface Marine or Land - Missing Station Pressure", + 187: "Surface Land - Missing Station Pressure", + 210: "Synthetic Tropical Cyclone", + 220: "Rawinsonde", + 221: "PIBAL", + 224: "NEXRAD Vertical Azimuth Display", + 228: "Wind Profiler: JMA", + 229: "Wind Profiler: PIBAL", + 230: "Aircraft: AIREP and PIREP", + 231: "Aircraft: AMDAR", + 232: "Flight-Level Reconnaissance and Profile Dropsonde", + 233: "Aircraft: MDCRS ACARS", + 234: "Aircraft: TAMDAR", + 235: "Aircraft: Canadian AMDAR", + 242: ("JMA IR (Longwave) and Visible Cloud Drift Below 850mb " + "(GMS, MTSAT, HIMAWARI)"), + 243: ("EUMETSAT IR (Longwave) and Visible Cloud Drift Below " + " 850mb (METEOSAT)"), + 244: "AVHRR/POES IR (Longwave) Cloud Drift", + 245: "NESDIS IR (Longwave) Cloud Drift (All Levels)", + 246: "NESDIS Imager Water Vapor (All Levels) - Cloud Top (GOES)", + 250: ("JMA Imager Water Vapor (All Levels) - Cloud Top & Deep " + "Layer (GMS, MTSAT, HIMAWARI)"), + 251: "NESDIS Visible Cloud Drift (All Levels) (GOES)", + 252: ("JMA IR (Longwave) and Visible Cloud Drift Above 850mb " + "(GMS, MTSAT, HIMAWARI)"), + 253: ("EUMETSAT IR (Longwave) and Visible Cloud Drift Above " + "850mb (METEOSAT)"), + 254: ("EUMETSAT Imager Water Vapor (All Levels) - Cloud Top " + " & Deep Layer (METEOSAT)"), + 257: ("MODIS/POES IR (Longwave) Cloud Drift (All Levels) " + "(AQUA, TERRA)"), + 258: ("MODIS/POES Imager Water Vapor (All Levels) - Cloud Top " + "(AQUA, TERRA)"), + 259: ("MODIS/POES Imager Water Vapor (All Levels) - Deep Layer " + "(AQUA, TERRA)"), + 280: ("Surface Marine w/ Station Pressure (Ship, Buoy, C-MAN, " + "Tide Guage)"), + 281: "Surface Land w/ Station Pressure (Synoptic, METAR)", + 282: "ATLAS Buoy", + 284: "Surface Marine or Land - Missing Station Pressure", + 287: "Surface Land (METAR) - Missing Station Pressure", + 289: "SUPEROBED (1.0 Lat/Lon) Scatterometer Winds over Ocean", + 290: "Non-SUPEROBED Scatterometer Winds over Ocean" + } + + descripts = list() + if obs_id is None: + return ['All Observations'] + + else: + for ids in obs_id: + if ids in obs_indicators.keys(): + descripts.append(obs_indicators[ids]) + + else: + descripts.append(str(ids)) + + return descripts + + +def get_bins(eval_type, stats): + """ + Calculates bins to use for histogram plot. + Args: + eval_type : (str) determines how to create bins + whether 'diff' or 'magnitude' + stats : (dict) dictionary of stats from + `calculate_stats()` + Returns: + bins : (array-like) + """ + binsize = stats['Std']/5 + + if eval_type == 'diff': + bins = np.arange(0-(4*stats['Std']), + 0+(4*stats['Std']), + binsize) + + else: + bins = np.arange(stats['Mean']-(4*stats['Std']), + stats['Mean']+(4*stats['Std']), + binsize) + + return bins + + +def calculate_stats(data): + """ + Calculates n, mean, min, max, + standard deviation, and RMSE. + Returns dictionary with stats. + Args: + data : (array-like) array of data to calculate + stats + Returns: + stats : (dict) dictionary of stats + """ + + n = np.count_nonzero(~np.isnan(data)) + + mean = np.nanmean(data) + std = np.nanstd(data) + mx = np.nanmax(data) + mn = np.nanmin(data) + + rmse = np.sqrt(np.nanmean(np.square(data))) + + stats = {'Nobs': n, + 'Min': np.round(mn, 3), + 'Max': np.round(mx, 3), + 'Mean': np.round(mean, 3), + 'Std': np.round(std, 3), + 'RMSE': np.round(rmse, 3) + } + + return stats + + +def get_labels(metadata): + """ + Creates a dictionary of title, date title, and the save + file name using information from the metadata. + Args: + metadata : (dict) metadata dictionary created by diags.py + Returns: + labels : (dict) dictionary of labels + """ + + var = metadata['Satellite'] if 'Satellite' in metadata \ + else metadata['Variable'] + + # Get title and save file name + if metadata['Anl Use Type'] is not None: + title = (f"{metadata['Obs Type']}: {var} - {metadata['Diag Type']}" + f" - Data {metadata['Anl Use Type']}") + + save_file = (f"{metadata['Date']:%Y%m%d%H}_{metadata['Obs Type']}_" + f"{var}_{metadata['Diag Type']}_" + f"{metadata['Anl Use Type']}_") + + else: + title = f"{metadata['Obs Type']}: {var} - {metadata['Diag Type']}" + save_file = (f"{metadata['Date']:%Y%m%d%H}_{metadata['Obs Type']}_" + f"{var}_{metadata['Diag Type']}_") + + # Adds on specific obsid, channel, or layer info to title/save file + if metadata['Diag File Type'] == 'conventional': + title = title + '\n%s' % '\n'.join(metadata['ObsID Name']) + save_file = save_file + '%s' % '_'.join( + str(x) for x in metadata['ObsID']) + + elif metadata['Diag File Type'] == 'radiance': + if metadata['Channels'] == 'All Channels': + title = title + '\nAll Channels' + save_file = save_file + '_All_Channels' + + else: + title = title + '\nChannels: %s' % ', '.join( + str(x) for x in metadata['Channels']) + save_file = save_file + 'channels_%s' % '_'.join( + str(x) for x in metadata['Channels']) + + else: + layer = metadata['Layer'] + title = title + f'\nLayer: {layer}' + save_file = save_file + '%s' % layer + + # Get date label + date_title = metadata['Date'].strftime("%d %b %Y %Hz") + + labels = { + 'title': title, + 'date title': date_title, + 'save file': save_file + } + + return labels + + +def varspecs_name(variable): + """ + Grabs the specific variable name to utlitize emcpy's VariableSpecs. + Args: + variable : (str) variable name + Returns: + spec_variable : (str) name of spec variable + """ + vardict = { + 'temperature': ['air temperature', 'tmp', 'temp', 't'], + 'specific humidity': ['q', 'spfh'], + 'u': ['eastward wind', 'ugrd', 'zonal wind'], + 'v': ['northward wind', 'vgrd', 'meridional wind'], + 'wind speed': ['windspeed'], + 'brightness temperature': ['bt'], + 'integrated layer ozone in air': ['ozone', 'o3'] + } + + # makes lower case and replaces underscore with space + spec_variable = variable.lower().replace('_', ' ') + + for key in vardict.keys(): + spec_variable = key if spec_variable in vardict[key] \ + else spec_variable + + return spec_variable diff --git a/LAMDA/scripts/LAMDA_mapping_workflow.py b/LAMDA/scripts/LAMDA_mapping_workflow.py new file mode 100644 index 0000000..50c718d --- /dev/null +++ b/LAMDA/scripts/LAMDA_mapping_workflow.py @@ -0,0 +1,111 @@ +import yaml +import numpy as np +from multiprocessing import Pool +from functools import partial +import itertools +import argparse +from LAMDA.map_qc_flags import map_qc_flags +from LAMDA.map_departures import map_departures +from LAMDA.layer_histogram import layer_histogram + + +def create_mp_work_list(diag_inputs, plotting_config): + """ + Create working list of every possible combination of + diag inputs and plotting features. + """ + work = [] + for d in diag_inputs: + keys, values = zip(*d.items()) + plotkeys, plotvals = zip(*plotting_config.items()) + + keys = keys+plotkeys + values = values+plotvals + + work_dicts = [dict(zip(keys, v)) for v in itertools.product(*values)] + work.extend(work_dicts) + + return work + + +def plotting_func(config, diag_file, data_type='omf', + diag_type=None, outdir='./'): + """ + Takes information required to create plots using + multiprocessing. + """ + + config['diag file'] = diag_file + config['data type'] = data_type + config['diag type'] = diag_type + config['outdir'] = outdir + + plot_dict = { + 'qcflags': map_qc_flags, + 'layer_histogram': layer_histogram, + 'map departures': map_departures, + } + + plot_dict[config['plot type']](config) + + +def workflow(data_config, plotting_config, nprocs, outdir='./'): + """ + Main workflow function for LAMDA diagnostic files. + + Args: + data_config : (yaml) yaml file containing info about diag + files + plotting_config : (yaml) yaml file containing info about + plotting information i.e. plot type, + domain, projection etc. + nprocs : (int) number of processors to use for + multiprocessing + outdir : (str) path to output diagnostics + """ + + # Open config yaml files + with open(data_config, 'r') as file: + config_yaml = yaml.load(file, Loader=yaml.FullLoader) + + with open(plotting_config, 'r') as file: + plotting_yaml = yaml.load(file, Loader=yaml.FullLoader) + + diag_file = config_yaml['diagnostic']['path'] + data_type = config_yaml['diagnostic']['data type'] + + for dtype in ['conventional', 'radiance', 'ozone']: + if dtype in config_yaml['diagnostic'].keys(): + diag_type = dtype + + work = create_mp_work_list(config_yaml['diagnostic'][diag_type], + plotting_yaml['plotting']) + + # Create multiprocessing Pool + p = Pool(processes=nprocs) + p.map(partial(plotting_func, diag_file=diag_file, + data_type=data_type, diag_type=diag_type, + outdir=outdir), work) + + +if __name__ == '__main__': + # Parse command line + ap = argparse.ArgumentParser() + ap.add_argument("-n", "--nprocs", + help="Number of tasks/processors for multiprocessing", + type=int, default=1) + ap.add_argument("-d", "--data_yaml", required=True, + help="Path to yaml file with diag data") + ap.add_argument("-p", "--plotting_yaml", required=True, + help="Path to yaml file with plotting info") + ap.add_argument("-o", "--outdir", default='./', + help="Out directory where files will be saved") + + myargs = ap.parse_args() + + nprocs = myargs.nprocs + data_yaml = myargs.data_yaml + plotting_yaml = myargs.plotting_yaml + outdir = myargs.outdir + + workflow(data_yaml, plotting_yaml, nprocs, outdir) diff --git a/LAMDA/scripts/LAMDA_stats_workflow.py b/LAMDA/scripts/LAMDA_stats_workflow.py new file mode 100644 index 0000000..073c182 --- /dev/null +++ b/LAMDA/scripts/LAMDA_stats_workflow.py @@ -0,0 +1,232 @@ +import argparse +import yaml +import glob +import os +import pandas as pd +import numpy as np +import itertools +from multiprocessing import Pool +from functools import partial +from datetime import datetime +from pyGSI.gsi_stat import GSIstat + +# import the plotting scripts: +from LAMDA.minimization_plots import minimization_plots +from LAMDA.bias_rmse_timeseries import bias_rmse_timeseries +from LAMDA.bias_stddev_channel import bias_stddev_channel + + +def concatenate_dfs(files, variable, cycles, data_type): + """ + Reads in list of files and creates a concatenated + dataframe of all cycles. + + Args: + files : (list) list of files + variable : (str) data obs type i.e. t, uv, q etc. + cycles : (list) list of str cycles + data_type : (str) the input data type i.e conventional + or radiance + Returns: + concatenated_df : concatenated dataframe over cycles + """ + dfs = [] + for i, file in enumerate(files): + gstat = GSIstat(file, cycles[i]) + + if data_type == 'radiance': + df = gstat.extract_instrument('rad', variable) + else: + df = gstat.extract(variable) + + dfs.append(df) + + concatenated_df = pd.concat(dfs) + + return concatenated_df + + +def plotting(config, data_dict, outdir, data_type, ob_type, + experiment_name,): + """ + Main plotting function that gets all inputs for plotting scripts + in order which includes fits_df, plotting_config, and outdir. + + Args: + config : (list of tuples) the multiprocessing inputs + including the plot type to create and the data + inputs (either conventional (obsids, subtype, obuse) + or radiance (channel, obuse)) + data_dir : (dict) dictionary that includes fits data + outdir : (str) path to where figures should be outputted + data_type : (str) determines if data is conventional or + radiance + ob_type : (str) the observation type + experiment_name : (str) the type of experiment i.e. + FV3LAMDA, LAMDAX, etc. + """ + + data_inputs = config[0] + plot_type = config[-1] + + plotting_config = { + 'ob type': ob_type, + 'data type': data_type, + 'experiment name': experiment_name + } + + if data_type == 'conventional': + plotting_config['obsid'] = data_inputs[0] + plotting_config['subtype'] = data_inputs[1] + plotting_config['obuse'] = data_inputs[-1] + + elif data_type == 'radiance': + plotting_config['sensor'] = ob_type.split('_')[0] + plotting_config['satellite'] = ob_type.split('_')[-1] + plotting_config['channel'] = data_inputs[0] + plotting_config['obuse'] = data_inputs[-1] + + # change ob_type to just the sensor to utilize + # GSIstat extract_sensor() + ob_type = ob_type.split('_')[0] + + # Loop through t-minus hours to grab proper data and + # generate plots + for tm in np.arange(7): + plotting_config['tm'] = tm + fits_data = [] + cycles = [] + + for cycle in data_dict.keys(): + fits_data.append(data_dict[cycle]['fits'][tm]) + cycles.append(cycle) + + # Concatenate all files into one dataframe + fits_df = concatenate_dfs(fits_data, ob_type, cycles, data_type) + + plot_dict = { + 'bias rmse timeseries': bias_rmse_timeseries, + 'bias stddev channel': bias_stddev_channel + } + + plot_dict[plot_type](fits_df, plotting_config, outdir) + + +def create_minimization_plots(data_dict, experiment_name, outdir): + """ + Since it is stand alone from other plotting scripts, this + functions purpose is to generate the minimization plots. + + Args: + data_dir : (dict) dictionary that includes fits2 data + experiment_name : (str) the type of experiment i.e. + FV3LAMDA, LAMDAX, etc. + outdir : (str) path to where figures should be outputted + """ + + # Loop through t-minus hours to grab proper data and + # generate plots + for tm in np.arange(7): + fits2_data = [] + cycles = [] + + for cycle in data_dict.keys(): + fits2_data.append(data_dict[cycle]['fits2'][tm]) + cycles.append(cycle) + + # Concatenate all files into one dataframe + fits2_df = concatenate_dfs(fits2_data, 'cost', cycles, + data_type='cost') + + # Get plotting information + plotting_config = { + 'tm': tm, + 'experiment': experiment_name + } + + # Create plot by calling plotting script + minimization_plots(fits2_df, plotting_config, outdir) + + +def stats_workflow(config_yaml, nprocs, outdir): + """ + Main function that reads input yaml and sorts + GSI stat data correctly. + + Args: + config_yaml : (yaml) input yaml that includes + path to stat files directory, variables + plot types, and out directory + nprocs : (int) number of processors to use for + multiprocessing + outdir : (str) path to output diagnostics + """ + + # Open config yaml files + with open(config_yaml, 'r') as file: + config_yaml = yaml.load(file, Loader=yaml.FullLoader) + + statdir = config_yaml['stat']['stat dir'] + data_type = config_yaml['stat']['data type'] + experiment_name = config_yaml['stat']['experiment name'] + ob_type = config_yaml['stat']['ob type'] + plot_types = config_yaml['stat']['plot types'] + + # Grabs all subdirectories of stats files (in date subdirectory) + subdirs = sorted([f.path for f in os.scandir(statdir) if f.is_dir()]) + + # Create dictionary of all stats files sorted by date + data_dict = {} + + for subdir in subdirs: + cycle = subdir.split('/')[-1] + fits = glob.glob(subdir + '/*fits.*') + fits2 = glob.glob(subdir + '/*fits2.*') + + data_dict[cycle] = {} + data_dict[cycle]['fits'] = sorted(fits) + data_dict[cycle]['fits2'] = sorted(fits2) + + # minimization plots are on their own so need to separate + # remove from plot list and then call function to plot + if 'minimization' in plot_types: + plot_types.remove('minimization') + create_minimization_plots(data_dict, experiment_name, outdir) + + # Create multiprocessing lists + if data_type == 'conventional': + conv_inputs = list(zip(config_yaml['stat']['observation id'], + config_yaml['stat']['observation subtype'], + config_yaml['stat']['obuse'])) + work_list = list(itertools.product(conv_inputs, plot_types)) + + elif data_type == 'radiance': + sat_inputs = list(zip(config_yaml['stat']['channels'], + config_yaml['stat']['obuse'])) + work_list = list(itertools.product(sat_inputs, plot_types)) + + # Create multiprocessing Pool + p = Pool(processes=nprocs) + p.map(partial(plotting, data_dict=data_dict, outdir=outdir, + ob_type=ob_type, data_type=data_type, + experiment_name=experiment_name), work_list) + + +if __name__ == '__main__': + # Parse command line + ap = argparse.ArgumentParser() + ap.add_argument("-n", "--nprocs", + help="Number of tasks/processors for multiprocessing", + type=int, default=1) + ap.add_argument("-s", "--stats_yaml", required=True, + help="Path to yaml file with stats data information") + ap.add_argument("-o", "--outdir", default='./', + help="Out directory where files will be saved") + + myargs = ap.parse_args() + + nprocs = myargs.nprocs + stats_yaml = myargs.stats_yaml + outdir = myargs.outdir + + stats_workflow(stats_yaml, nprocs, outdir) diff --git a/LAMDA/scripts/create_conv_stat_yaml.py b/LAMDA/scripts/create_conv_stat_yaml.py new file mode 100644 index 0000000..83dfbf1 --- /dev/null +++ b/LAMDA/scripts/create_conv_stat_yaml.py @@ -0,0 +1,79 @@ +import argparse +import yaml +import glob +import csv +import os +import numpy as np + + +def read_convinfo(infofile): + # read in convinfo file + obtype = [] # string used in the filename + typeint = [] # integer value of observation type + subtypeint = [] # integer value of observation subtype + obuse = [] # use 1, monitor -1 + cv = open(infofile) + rdcv = csv.reader(filter(lambda row: row[0] != '!', cv)) + for row in rdcv: + try: + rowsplit = row[0].split() + obtype.append(rowsplit[0]) + typeint.append(int(rowsplit[1])) + subtypeint.append(int(rowsplit[2])) + obuse.append(int(rowsplit[3])) + except IndexError: + pass # end of file + cv.close() + return obtype, typeint, subtypeint, obuse + + +def main(config): + # call function to get lists from convinfo file + obtype, typeint, subtypeint, obuse = read_convinfo(config['convinfo']) + + unique_obstypes = ['uv', 't', 'q', 'gps', 'ps'] + + for ob in unique_obstypes: + yamlout = {'stat': {}} + + obindx = np.where(np.array(obtype) == ob) + + obsid_list = np.array(typeint)[obindx].tolist() + subtype_list = np.array(subtypeint)[obindx].tolist() + obuse_list = np.array(obuse)[obindx].tolist() + + yamlout['stat']['stat dir'] = config['statdir'] + yamlout['stat']['ob type'] = ob + yamlout['stat']['data type'] = 'conventional' + yamlout['stat']['observation id'] = obsid_list + yamlout['stat']['observation subtype'] = subtype_list + yamlout['stat']['obuse'] = obuse_list + + filetype = f'conv_{ob}_stats' + output_yamlfile = config['yaml'] + filetype + '.yaml' + + # write out the YAML + with open(output_yamlfile, 'w') as file: + yaml.dump(yamlout, file, default_flow_style=False) + print('YAML written to ' + output_yamlfile) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=('Given an input convinfo GSI file and path to ', + 'GSI stat files, generate an output YAML file ', + 'for LAMDA monitoring.')) + parser.add_argument('-s', '--statdir', type=str, + help='path to GSI stat files', + required=True) + parser.add_argument('-i', '--convinfo', type=str, + help='path to GSI convinfo file', + required=True) + parser.add_argument('-y', '--yaml', type=str, + help='path to output YAML file', + required=True) + args = parser.parse_args() + + config = vars(args) + + main(config) diff --git a/LAMDA/scripts/create_rad_stat_yaml.py b/LAMDA/scripts/create_rad_stat_yaml.py new file mode 100644 index 0000000..f97c3fe --- /dev/null +++ b/LAMDA/scripts/create_rad_stat_yaml.py @@ -0,0 +1,75 @@ +import argparse +import yaml +import glob +import csv +import os +import numpy as np + + +def read_satinfo(infofile): + # read in satinfo file + sensor = [] # string used in the filename + channel = [] # integer value of channel + obuse = [] # use 1, monitor -1 + cv = open(infofile) + rdcv = csv.reader(filter(lambda row: row[0] != '!', cv)) + for row in rdcv: + try: + rowsplit = row[0].split() + sensor.append(rowsplit[0]) + channel.append(int(rowsplit[1])) + obuse.append(int(rowsplit[2])) + except IndexError: + pass # end of file + cv.close() + return sensor, channel, obuse + + +def main(config): + # call function to get lists from satinfo file + sensors, channel, obuse = read_satinfo(config['satinfo']) + + unique_sensors = np.unique(sensors) + + for sensor in unique_sensors: + yamlout = {'stat': {}} + + sensorindx = np.where(np.array(sensors) == sensor) + + channel_list = np.array(channel)[sensorindx].tolist() + obuse_list = np.array(obuse)[sensorindx].tolist() + + yamlout['stat']['stat dir'] = config['statdir'] + yamlout['stat']['ob type'] = sensor + yamlout['stat']['data type'] = 'radiance' + yamlout['stat']['channels'] = channel_list + yamlout['stat']['obuse'] = obuse_list + + filetype = f'{sensor}_stats' + output_yamlfile = config['yaml'] + filetype + '.yaml' + + # write out the YAML + with open(output_yamlfile, 'w') as file: + yaml.dump(yamlout, file, default_flow_style=False) + print('YAML written to ' + output_yamlfile) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=('Given an input convinfo GSI file and path to ', + 'GSI stat files, generate an output YAML file ', + 'for LAMDA monitoring.')) + parser.add_argument('-s', '--statdir', type=str, + help='path to GSI stat files', + required=True) + parser.add_argument('-i', '--convinfo', type=str, + help='path to GSI convinfo file', + required=True) + parser.add_argument('-y', '--yaml', type=str, + help='path to output YAML file', + required=True) + args = parser.parse_args() + + config = vars(args) + + main(config) diff --git a/pyGSI/gsi_stat.py b/pyGSI/gsi_stat.py index e3a953f..ae510d9 100644 --- a/pyGSI/gsi_stat.py +++ b/pyGSI/gsi_stat.py @@ -149,18 +149,18 @@ def extract_instrument(self, obtype, instrument): tmp.append(tst) columns = ['it', 'channel', 'instrument', 'satellite', - 'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc', - 'col1', 'col2', 'col3'] + 'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc', + 'pen', 'rms', 'std'] df = pd.DataFrame(data=tmp, columns=columns) - df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1) +# df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1) df[['channel', 'nassim', 'nrej']] = df[[ 'channel', 'nassim', 'nrej']].astype(np.int) - df[['oberr', 'OmF_bc', 'OmF_wobc']] = df[[ - 'oberr', 'OmF_bc', 'OmF_wobc']].astype(np.float) + df[['oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']] = df[[ + 'oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']].astype(np.float) # Since iteration number is not readily available, make one lendf = len(df) - nouter = np.int(lendf / len(df['it'].unique())) + nouter = np.int(np.round(lendf / len(df['it'].unique()))) douter = np.int(lendf / nouter) it = np.zeros(lendf, dtype=int) for i in range(nouter): @@ -170,7 +170,8 @@ def extract_instrument(self, obtype, instrument): df['it'] = it df = df[['it', 'instrument', 'satellite', 'channel', - 'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc']] + 'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc', + 'pen', 'rms', 'std']] df.set_index(['it', 'instrument', 'satellite', 'channel'], inplace=True) diff --git a/ush/convinfo2yaml.py b/ush/convinfo2yaml.py index d7323a9..f6c694b 100755 --- a/ush/convinfo2yaml.py +++ b/ush/convinfo2yaml.py @@ -72,9 +72,10 @@ def main(config): continue # only process assimilated obs for now dictloop = { - 'observation id': int(itype), - 'observation subtype': int(isub), - 'analysis use': True + 'observation id': [int(itype)], + 'observation subtype': [int(isub)], + 'analysis use': [True], + 'bias correction': [True] } yamlout['diagnostic']['conventional'].append(dictloop) diff --git a/ush/ozinfo2yaml.py b/ush/ozinfo2yaml.py index 22280b8..cd8574c 100755 --- a/ush/ozinfo2yaml.py +++ b/ush/ozinfo2yaml.py @@ -76,7 +76,10 @@ def main(config): if iuse != 1 and not config['monitor']: continue # only process assimilated obs for now - dictloop = {'layer': int(ilayer)} + dictloop = { + 'layer': [int(ilayer)], + 'bias correction': [True] + } yamlout['diagnostic']['ozone'].append(dictloop) diff --git a/ush/satinfo2yaml.py b/ush/satinfo2yaml.py index 1e8e912..ce6d0bf 100755 --- a/ush/satinfo2yaml.py +++ b/ush/satinfo2yaml.py @@ -72,9 +72,10 @@ def main(config): continue # only process assimilated obs for now dictloop = { - 'channel': int(ichan), - 'qc flag': 0, - 'analysis use': True + 'channel': [int(ichan)], + 'qc flag': [0], + 'analysis use': [True], + 'bias correction': [True] } yamlout['diagnostic']['radiance'].append(dictloop)