diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f5f7b7c5..1a6202d9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -125,62 +125,64 @@ get_data: - runner_system_failure include: - - local: 'benchmarks/backgrounds/config.yml' - - local: 'benchmarks/backwards_ecal/config.yml' - - local: 'benchmarks/calo_pid/config.yml' - - local: 'benchmarks/ecal_gaps/config.yml' - - local: 'benchmarks/tracking_detectors/config.yml' - - local: 'benchmarks/tracking_performances/config.yml' - - local: 'benchmarks/tracking_performances_dis/config.yml' - - local: 'benchmarks/barrel_ecal/config.yml' - - local: 'benchmarks/barrel_hcal/config.yml' - - local: 'benchmarks/lfhcal/config.yml' - - local: 'benchmarks/zdc/config.yml' - - local: 'benchmarks/zdc_lyso/config.yml' - - local: 'benchmarks/zdc_neutron/config.yml' - - local: 'benchmarks/zdc_photon/config.yml' - - local: 'benchmarks/zdc_pi0/config.yml' - - local: 'benchmarks/material_scan/config.yml' - - local: 'benchmarks/pid/config.yml' - - local: 'benchmarks/timing/config.yml' - - local: 'benchmarks/b0_tracker/config.yml' - - local: 'benchmarks/insert_muon/config.yml' - - local: 'benchmarks/insert_tau/config.yml' - - local: 'benchmarks/zdc_sigma/config.yml' - - local: 'benchmarks/zdc_lambda/config.yml' - - local: 'benchmarks/insert_neutron/config.yml' - - local: 'benchmarks/femc_electron/config.yml' - - local: 'benchmarks/femc_photon/config.yml' - - local: 'benchmarks/femc_pi0/config.yml' + # - local: 'benchmarks/backgrounds/config.yml' + # - local: 'benchmarks/backwards_ecal/config.yml' + # - local: 'benchmarks/calo_pid/config.yml' + # - local: 'benchmarks/ecal_gaps/config.yml' + # - local: 'benchmarks/tracking_detectors/config.yml' + # - local: 'benchmarks/tracking_performances/config.yml' + # - local: 'benchmarks/tracking_performances_dis/config.yml' + # - local: 'benchmarks/barrel_ecal/config.yml' + # - local: 'benchmarks/barrel_hcal/config.yml' + # - local: 'benchmarks/lfhcal/config.yml' + # - local: 'benchmarks/zdc/config.yml' + # - local: 'benchmarks/zdc_lyso/config.yml' + # - local: 'benchmarks/zdc_neutron/config.yml' + # - local: 'benchmarks/zdc_photon/config.yml' + # - local: 'benchmarks/zdc_pi0/config.yml' + # - local: 'benchmarks/material_scan/config.yml' + # - local: 'benchmarks/pid/config.yml' + # - local: 'benchmarks/timing/config.yml' + # - local: 'benchmarks/b0_tracker/config.yml' + # - local: 'benchmarks/insert_muon/config.yml' + # - local: 'benchmarks/insert_tau/config.yml' + # - local: 'benchmarks/zdc_sigma/config.yml' + # - local: 'benchmarks/zdc_lambda/config.yml' + # - local: 'benchmarks/insert_neutron/config.yml' + # - local: 'benchmarks/femc_electron/config.yml' + # - local: 'benchmarks/femc_photon/config.yml' + # - local: 'benchmarks/femc_pi0/config.yml' + - local: 'benchmarks/lowq2/reconstruction_training/config.yml' deploy_results: allow_failure: true stage: deploy needs: - - "collect_results:backgrounds" - - "collect_results:backwards_ecal" - - "collect_results:barrel_ecal" - - "collect_results:barrel_hcal" - - "collect_results:calo_pid" - - "collect_results:ecal_gaps" - - "collect_results:lfhcal" - - "collect_results:material_scan" - - "collect_results:pid" - - "collect_results:tracking_performance" - - "collect_results:tracking_performance_campaigns" - - "collect_results:zdc_sigma" - - "collect_results:zdc_lambda" - - "collect_results:insert_neutron" - - "collect_results:tracking_performances_dis" - - "collect_results:zdc" - - "collect_results:zdc_lyso" - - "collect_results:zdc_neutron" - - "collect_results:insert_muon" - - "collect_results:insert_tau" - - "collect_results:zdc_photon" - - "collect_results:zdc_pi0" - - "collect_results:femc_electron" - - "collect_results:femc_photon" - - "collect_results:femc_pi0" + # - "collect_results:backgrounds" + # - "collect_results:backwards_ecal" + # - "collect_results:barrel_ecal" + # - "collect_results:barrel_hcal" + # - "collect_results:calo_pid" + # - "collect_results:ecal_gaps" + # - "collect_results:lfhcal" + # - "collect_results:material_scan" + # - "collect_results:pid" + # - "collect_results:tracking_performance" + # - "collect_results:tracking_performance_campaigns" + # - "collect_results:zdc_sigma" + # - "collect_results:zdc_lambda" + # - "collect_results:insert_neutron" + # - "collect_results:tracking_performances_dis" + # - "collect_results:zdc" + # - "collect_results:zdc_lyso" + # - "collect_results:zdc_neutron" + # - "collect_results:insert_muon" + # - "collect_results:insert_tau" + # - "collect_results:zdc_photon" + # - "collect_results:zdc_pi0" + # - "collect_results:femc_electron" + # - "collect_results:femc_photon" + # - "collect_results:femc_pi0" + - "collect_results:lowq2_reconstruction_training" script: - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json - find results -print | sort | tee summary.txt diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile index 1792b772..fcabdf44 100644 --- a/benchmarks/lowq2/reconstruction_training/Snakefile +++ b/benchmarks/lowq2/reconstruction_training/Snakefile @@ -1,11 +1,17 @@ +import os + +# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set +outputdir = os.getenv("LOWQ2_DATADIR", "") +resultsdir = os.getenv("LOWQ2_RESULTSDIR", "") + # Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file. rule lowq2_tensor_recon: output: - "tensors.eicrecon.tree.edm4eic.root", + f"{outputdir}tensors.eicrecon.tree.edm4eic.root", log: - "tensors.eicrecon.tree.edm4eic.root.log", - params: - input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)), + f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log", + params: + input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)), compact_xml="epic_lowq2.xml", shell: """ @@ -18,10 +24,10 @@ rule lowq2_tensor_recon: # Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor. rule lowq2_reconstruction_training: input: - data="tensors.eicrecon.tree.edm4eic.root", + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", script="TaggerRegression.py", output: - "TestTaggerRegression.onnx", + f"{outputdir}TestTaggerTrackerTransportation.onnx", shell: """ python {input.script} --dataFiles {input.data} --outModelFile {output} @@ -30,9 +36,9 @@ rule lowq2_reconstruction_training: # Runs the inference with the default model. rule lowq2_reconstruction_inference: input: - data="tensors.eicrecon.tree.edm4eic.root", + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", output: - "TaggerInference.eicrecon.tree.edm4eic.root", + f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root", params: compact_xml="epic_lowq2.xml", shell: @@ -43,13 +49,13 @@ rule lowq2_reconstruction_inference: -Pdd4hep:xml_files={params.compact_xml} \ """ -# Runs the inference with the new model. +# Check inference runs with the new model. rule lowq2_reconstruction_new_inference: input: - data="tensors.eicrecon.tree.edm4eic.root", - model="TestTaggerRegression.onnx", + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=f"{outputdir}TestTaggerTrackerTransportation.onnx", output: - "TaggerInferenceNew.eicrecon.tree.edm4eic.root", + f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root", params: compact_xml="epic_lowq2.xml", shell: @@ -60,4 +66,30 @@ rule lowq2_reconstruction_new_inference: -Pdd4hep:xml_files={params.compact_xml} \ -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \ -Peicrecon:LogLevel=error \ + """ + +# Create plots showing the performance of a model. +rule lowq2_reconstruction_new_plot: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx", + output: + directory(expand(f"{resultsdir}{{model}}", model="{model}")), + shell: + """ + mkdir -p {output} + python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output} + """ + +# Create plots showing the performance of a model. +rule lowq2_reconstruction_old_plot: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx", + output: + directory(expand(f"{resultsdir}{{model}}", model="{model}")), + shell: + """ + mkdir -p {output} + python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output} """ \ No newline at end of file diff --git a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py index e0575126..e3d36ee1 100644 --- a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py +++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py @@ -38,7 +38,7 @@ print(f"Training data: {len(train_input_data)} samples") -epochs = 100 +epochs = 1000 model = trainModel(epochs, train_loader, val_loader) # Save the trained model to ONNX format diff --git a/benchmarks/lowq2/reconstruction_training/TestModel.py b/benchmarks/lowq2/reconstruction_training/TestModel.py new file mode 100644 index 00000000..dfd95931 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/TestModel.py @@ -0,0 +1,211 @@ +import onnxruntime as ort +import argparse +import numpy as np +from ProcessData import create_arrays +import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm +from scipy.stats import norm +from scipy.optimize import curve_fit + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outDir', type=str, default=".", help='Output directory') +args = parser.parse_args() +modelFile = args.modelFile +dataFiles = args.dataFiles +outDir = args.outDir +outGraphFile = outDir + "/output_vs_target.png" +outGraphFile2 = outDir + "/output_vs_target2.png" +outGraphFile3 = outDir + "/output_vs_target3.png" +outGraphFile4 = outDir + "/output_vs_target4.png" + +input_data, target_data = create_arrays(dataFiles) + +target_data = np.array(target_data) + +# Load the ONNX model +session = ort.InferenceSession(modelFile) + +# Run the model on the input data +input_name = session.get_inputs()[0].name +output_name = session.get_outputs()[0].name +input_data = np.array(input_data,dtype=np.float32) +output = session.run([output_name], {input_name: input_data}) +output = np.array(output[0]) + +out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2]) +out_phi = np.arctan2(output[:,1],output[:,0]) +out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2) +in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2]) +in_phi = np.arctan2(target_data[:,1],target_data[:,0]) +in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2) + + +thetadiff = out_theta - in_theta +phidiff = out_phi - in_phi +# Move phidiff to within -pi/2 and pi/2 +phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi +magdiff = out_mag - in_mag + +diff = (target_data - output)/target_data +diffrange = [[-5,5],[-5,5],[-0.5,0.5]] +datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]] + +# Use the 'seismic' colormap +cmap = plt.get_cmap('seismic') + +# Creates histograms to compare the target and output data +fig, axs = plt.subplots(3, 3, figsize=(12, 12)) +for i in range(3): + # 2D histograms showing trends in the data + axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target") + axs[0,i].set_xlabel(f"Variable {i} Target") + axs[0,i].set_ylabel(f"Variable {i} Output") + + axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference") + axs[1,i].set_xlabel(f"Variable {i} Difference") + axs[1,i].set_ylabel("Counts") + + axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target") + axs[2,i].set_xlabel(f"Variable {i} Target") + axs[2,i].set_ylabel(f"Variable {i} Difference") + +plt.show() +plt.savefig(outGraphFile) + +# Creates histograms to compare theta, phi and mag target and output data +fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12)) + +thetarange = [np.pi-0.01,np.pi] +phirange = [-np.pi,np.pi] +magrange = [0,1] + +thetadiffrange = [-0.01,0.01] +phidiffrange = [-np.pi,np.pi] +magdiffrange = [-0.1,0.1] + +# 2D histograms showing trends in the data +axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,0].set_xlabel("Theta Target") +axs2[0,0].set_ylabel("Theta Output") + +axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs2[0,1].set_xlabel("Phi Target") +axs2[0,1].set_ylabel("Phi Output") + +axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,2].set_xlabel("Mag Target") +axs2[0,2].set_ylabel("Mag Output") + +axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs2[1,0].set_xlabel("Theta Difference") +axs2[1,0].set_ylabel("Counts") + +axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs2[1,1].set_xlabel("Phi Difference") +axs2[1,1].set_ylabel("Counts") + +axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs2[1,2].set_xlabel("Mag Difference") +axs2[1,2].set_ylabel("Counts") + +axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,0].set_xlabel("Theta Target") +axs2[2,0].set_ylabel("Theta Difference") + +axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs2[2,1].set_xlabel("Phi Target") +axs2[2,1].set_ylabel("Phi Difference") + +axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,2].set_xlabel("Mag Target") +axs2[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile2) + +# Create histograms where the theta value has been cut at less than 3.14 +fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12)) + +out_theta_cut = out_theta[out_theta < 3.14] +in_theta_cut = in_theta[out_theta < 3.14] +thetadiff_cut = thetadiff[out_theta < 3.14] + +out_phi_cut = out_phi[out_theta < 3.14] +in_phi_cut = in_phi[out_theta < 3.14] +phidiff_cut = phidiff[out_theta < 3.14] + +out_mag_cut = out_mag[out_theta < 3.14] +in_mag_cut = in_mag[out_theta < 3.14] +magdiff_cut = magdiff[out_theta < 3.14] + +axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,0].set_xlabel("Theta Target") +axs3[0,0].set_ylabel("Theta Output") + +axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs3[0,1].set_xlabel("Phi Target") +axs3[0,1].set_ylabel("Phi Output") + +axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,2].set_xlabel("Mag Target") +axs3[0,2].set_ylabel("Mag Output") + +axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs3[1,0].set_xlabel("Theta Difference") +axs3[1,0].set_ylabel("Counts") + +axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs3[1,1].set_xlabel("Phi Difference") +axs3[1,1].set_ylabel("Counts") + +axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs3[1,2].set_xlabel("Mag Difference") +axs3[1,2].set_ylabel("Counts") + +axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,0].set_xlabel("Theta Target") +axs3[2,0].set_ylabel("Theta Difference") + +axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs3[2,1].set_xlabel("Phi Target") +axs3[2,1].set_ylabel("Phi Difference") + +axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,2].set_xlabel("Mag Target") +axs3[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile3) + +# Create plots where a Gaussian has been fitted to the data +# Function to fit a Gaussian and plot the results +def plot_gaussian_fit(ax, data, range, xlabel, ylabel): + def gaussian(x, mu, sigma, amplitude): + return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + hist, bin_edges = np.histogram(data, bins=100, range=range, density=True) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + + popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1]) + mu, sigma, amplitude = popt + print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}") + + x = np.linspace(range[0], range[1], 100) + ax.plot(x, gaussian(x, *popt), 'k', linewidth=2) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True) + ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}']) + +# Create histograms with Gaussian fits +fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12)) + +plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density") +plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density") +plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density") + +plt.show() +plt.savefig(outGraphFile4) \ No newline at end of file