Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xgboost ID implemented #13

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 102 additions & 30 deletions NanoCORE/ElectronSelections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

using namespace tas;

namespace WWZ {
std::shared_ptr<XGBoostInterface> electron_mvareader_map;
}

bool SS::electronID(int idx, SS::IDLevel id_level, int year) {
// Common (across years and ID levels) checks
if (Electron_pt().at(idx) < 7.) { return false; }
Expand Down Expand Up @@ -349,17 +353,101 @@ bool ttH::isTriggerSafeNoIso(int idx) {
return true;
}

void WWZ::electronLoadMVA(std::string fname)
{

if (electron_mvareader_map)
{
std::cout << "WARNING: XGBoost already loaded, but is trying to load again!" << std::endl;
return;
}

std::cout << "electronLoadMVA(): Loading XGBoost binary file = " << fname << std::endl;

std::vector<std::string> varnames;
varnames = std::vector<std::string>{
"pt",
"eta",
"jetNDauCharged",
"miniPFRelIso_chg",
"miniPFRelIso_diff_all_chg", // = miniPFRelIso_all - miniPFRelIso_chg
"jetPtRelv2",
"jetPtRatio", // = 1/(jetRelIso+1)
"pfRelIso03_all",
"ak4jet:btagDeepFlavB", // B tagging discriminant score
"sip3d",
"log_abs_dxy",
"log_abs_dz",
};
varnames.push_back("mvaFall17V2noIso");
float missing_entry_val = std::numeric_limits<float>::quiet_NaN();
electron_mvareader_map = std::make_shared<XGBoostInterface>();
electron_mvareader_map->build(fname, varnames, missing_entry_val);
}

float WWZ::computeElectronTopMVAScore(unsigned int idx)
{

float res = -999;
std::unordered_map<std::string, float> input_vars;

auto const &vnames = electron_mvareader_map->getVariableNames();
for (auto const &vname : vnames)
{
if (vname == "pt")
input_vars[vname] = static_cast<float>(tas::Electron_pt().at(idx));
else if (vname == "eta")
input_vars[vname] = static_cast<float>(tas::Electron_eta().at(idx));
else if (vname == "miniPFRelIso_diff_all_chg")
input_vars[vname] = static_cast<float>(tas::Electron_miniPFRelIso_all().at(idx) - tas::Electron_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRatio")
input_vars[vname] = static_cast<float>(1. / (tas::Electron_jetRelIso().at(idx) + 1.));
else if (vname == "log_abs_dxy")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Electron_dxy().at(idx))));
else if (vname == "log_abs_dz")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Electron_dz().at(idx))));
else if (vname == "sip3d")
input_vars[vname] = static_cast<float>(tas::Electron_sip3d().at(idx));
else if (vname == "miniPFRelIso_chg")
input_vars[vname] = static_cast<float>(tas::Electron_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRelv2")
input_vars[vname] = static_cast<float>(tas::Electron_jetPtRelv2().at(idx));
else if (vname == "jetNDauCharged")
input_vars[vname] = static_cast<int>(tas::Electron_jetNDauCharged().at(idx));
else if (vname == "pfRelIso03_all")
input_vars[vname] = static_cast<float>(tas::Electron_pfRelIso03_all().at(idx));
else if (vname == "mvaFall17V2noIso")
input_vars[vname] = static_cast<float>(tas::Electron_mvaFall17V2noIso().at(idx));
else if (vname == "ak4jet:btagDeepFlavB")
{
input_vars[vname] = float(0);
if (tas::Electron_jetIdx().at(idx) == -1) input_vars[vname] = static_cast<float>(0.);
if (tas::Electron_jetIdx().at(idx) != -1) input_vars[vname] = static_cast<float>(tas::Jet_btagDeepFlavB().at(tas::Electron_jetIdx().at(idx)));
}
else
{
std::cerr << "WWZ::computeElectronTopMVAScore: Input variable name " << vname << " does not match to a corresponding variable" << endl;
std::cerr << "Have you loaded the XGBoost binary? i.e. did you call muonLoadMVA()?" << std::endl;
assert(0);
}
}

electron_mvareader_map->eval(input_vars, res);

return res;
}

bool WWZ::electronID(int idx, WWZ::IDLevel id_level, int year) {
// Year-specific checks
switch (year) {
case (2016):
return WWZ::electron2016ID(idx, id_level);
return WWZ::electronRun2ID(idx);
break;
case (2017):
return WWZ::electron2017ID(idx, id_level);
return WWZ::electronRun2ID(idx);
break;
case (2018):
return WWZ::electron2018ID(idx, id_level);
return WWZ::electronRun2ID(idx);
break;
case (2022):
return WWZ::electron2022ID(idx, id_level);
Expand All @@ -371,33 +459,17 @@ bool WWZ::electronID(int idx, WWZ::IDLevel id_level, int year) {
}
}

bool WWZ::electron2016ID(int idx, WWZ::IDLevel id_level) {
if (not (fabs(Electron_eta().at(idx) + Electron_deltaEtaSC().at(idx)) < 2.5)) return false;
if (not (Electron_pt().at(idx) > 10. )) return false;
if (not (fabs(Electron_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(Electron_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(Electron_sip3d().at(idx)) < 8 )) return false;
if (not (Electron_miniPFRelIso_all().at(idx) < 0.4 )) return false;
return true;
}

bool WWZ::electron2017ID(int idx, WWZ::IDLevel id_level) {
if (not (fabs(Electron_eta().at(idx) + Electron_deltaEtaSC().at(idx)) < 2.5)) return false;
if (not (Electron_pt().at(idx) > 10. )) return false;
if (not (fabs(Electron_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(Electron_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(Electron_sip3d().at(idx)) < 8 )) return false;
if (not (Electron_miniPFRelIso_all().at(idx) < 0.4 )) return false;
return true;
}

bool WWZ::electron2018ID(int idx, WWZ::IDLevel id_level) {
if (not (fabs(Electron_eta().at(idx) + Electron_deltaEtaSC().at(idx)) < 2.5)) return false;
if (not (Electron_pt().at(idx) > 10. )) return false;
if (not (fabs(Electron_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(Electron_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(Electron_sip3d().at(idx)) < 8 )) return false;
if (not (Electron_miniPFRelIso_all().at(idx) < 0.4 )) return false;
bool WWZ::electronRun2ID(int idx) {
if (not (tas::Electron_pt().at(idx) > 10. )) return false;
if (not (fabs(tas::Electron_eta().at(idx)) < 2.5 )) return false;
if (not (fabs(tas::Electron_dxy().at(idx)) < 0.05)) return false;
if (not (fabs(tas::Electron_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(tas::Electron_sip3d().at(idx)) < 8 )) return false;
if (not (tas::Electron_miniPFRelIso_all().at(idx) < 0.4 )) return false;
if (not (tas::Electron_lostHits().at(idx) <= 1 )) return false;
if (not (tas::Electron_convVeto().at(idx) )) return false;
if (not (tas::Electron_tightCharge().at(idx) == 2 )) return false;
if (not (computeElectronTopMVAScore(idx) > 0.81 )) return false;
return true;
}

Expand Down
8 changes: 5 additions & 3 deletions NanoCORE/ElectronSelections.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define ELECTRONSELECTIONS_H
#include "Base.h"
#include "Nano.h"
#include "Tools/xgboost/XGBoostInterface.h"

namespace SS {
enum ElectronMVAIDLevel {
Expand Down Expand Up @@ -31,10 +32,11 @@ namespace ttH {
}

namespace WWZ {
extern std::shared_ptr<XGBoostInterface> electron_mvareader_map;
void electronLoadMVA(std::string fname);
float computeElectronTopMVAScore(unsigned int idx);
bool electronID(int idx, WWZ::IDLevel id_level, int year);
bool electron2016ID(int idx, WWZ::IDLevel id_level);
bool electron2017ID(int idx, WWZ::IDLevel id_level);
bool electron2018ID(int idx, WWZ::IDLevel id_level);
bool electronRun2ID(int idx);
bool electron2022ID(int idx, WWZ::IDLevel id_level);
}

Expand Down
11 changes: 9 additions & 2 deletions NanoCORE/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@ SOURCES=$(wildcard *.cc) $(wildcard Tools/*.cc) $(wildcard Tools/btagsf/*.cc) $(
OBJECTS=$(SOURCES:.cc=.o)
LIB=NANO_CORE.so

# XGBOOST essentials
XGBOOSTLIBDIR = ${XGBOOST_PATH}/lib/
XGBOOSTINCDIR = ${XGBOOSTLIBDIR}../include/
RABITINCDIR = ${XGBOOSTLIBDIR}../rabit/include/
XGBOOSTCXXFLAGS = -I$(XGBOOSTINCDIR) -I$(RABITINCDIR) -L$(XGBOOSTLIBDIR)
XGBOOSTLIBS = -lxgboost -L$(XGBOOSTLIBDIR)

$(LIB): $(OBJECTS)
$(LD) $(LDFLAGS) $(SOFLAGS) $(OBJECTS) $(ROOTLIBS) -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -o $@
$(LD) $(LDFLAGS) $(SOFLAGS) $(OBJECTS) $(XGBOOSTLIBS) $(ROOTLIBS) -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -o $@
ln -sf $(LIB) lib$(LIB)

%.o: %.cc
$(CXX) $(CXXFLAGS) -I${CMSSW_BASE}/../../../external/boost/1.67.0/include -I${CMSSW_BASE}/src -c $< -o $@ -fno-var-tracking
$(CXX) $(CXXFLAGS) $(XGBOOSTCXXFLAGS) -I${CMSSW_BASE}/../../../external/boost/1.67.0/include -I${CMSSW_BASE}/src -c $< -o $@ -fno-var-tracking

test: all
python Tools/unit_tests/tests.py
Expand Down
133 changes: 100 additions & 33 deletions NanoCORE/MuonSelections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

using namespace tas;

namespace WWZ {
std::shared_ptr<XGBoostInterface> muon_mvareader_map;
}

bool SS::muonID(unsigned int idx, SS::IDLevel id_level, int year) {
// Common (across years and ID levels) checks
if (Muon_pt().at(idx) < 5.) { return false; }
Expand Down Expand Up @@ -131,17 +135,100 @@ bool ttH::muonID(unsigned int idx, ttH::IDLevel id_level, int year) {
return true;
}

void WWZ::muonLoadMVA(std::string fname)
{

if (muon_mvareader_map)
{
std::cout << "WARNING: XGBoost already loaded, but is trying to load again!" << std::endl;
return;
}

std::cout << "muonLoadMVA(): Loading XGBoost binary file = " << fname << std::endl;

std::vector<std::string> varnames;
varnames = std::vector<std::string>{
"pt",
"eta",
"jetNDauCharged",
"miniPFRelIso_chg",
"miniPFRelIso_diff_all_chg", // = miniPFRelIso_all - miniPFRelIso_chg
"jetPtRelv2",
"jetPtRatio", // = 1/(jetRelIso+1)
"pfRelIso03_all",
"ak4jet:btagDeepFlavB", // B tagging discriminant score
"sip3d",
"log_abs_dxy",
"log_abs_dz",
"segmentComp",
};
float missing_entry_val = std::numeric_limits<float>::quiet_NaN();
muon_mvareader_map = std::make_shared<XGBoostInterface>();
muon_mvareader_map->build(fname, varnames, missing_entry_val);
}

float WWZ::computeMuonTopMVAScore(unsigned int idx)
{

float res = -999;
std::unordered_map<std::string, float> input_vars;

auto const &vnames = muon_mvareader_map->getVariableNames();
for (auto const &vname : vnames)
{
if (vname == "pt")
input_vars[vname] = static_cast<float>(tas::Muon_pt().at(idx));
else if (vname == "eta")
input_vars[vname] = static_cast<float>(tas::Muon_eta().at(idx));
else if (vname == "miniPFRelIso_diff_all_chg")
input_vars[vname] = static_cast<float>(tas::Muon_miniPFRelIso_all().at(idx) - tas::Muon_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRatio")
input_vars[vname] = static_cast<float>(1. / (tas::Muon_jetRelIso().at(idx) + 1.));
else if (vname == "log_abs_dxy")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Muon_dxy().at(idx))));
else if (vname == "log_abs_dz")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Muon_dz().at(idx))));
else if (vname == "sip3d")
input_vars[vname] = static_cast<float>(tas::Muon_sip3d().at(idx));
else if (vname == "segmentComp")
input_vars[vname] = static_cast<float>(tas::Muon_segmentComp().at(idx));
else if (vname == "miniPFRelIso_chg")
input_vars[vname] = static_cast<float>(tas::Muon_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRelv2")
input_vars[vname] = static_cast<float>(tas::Muon_jetPtRelv2().at(idx));
else if (vname == "jetNDauCharged")
input_vars[vname] = static_cast<int>(tas::Muon_jetNDauCharged().at(idx));
else if (vname == "pfRelIso03_all")
input_vars[vname] = static_cast<float>(tas::Muon_pfRelIso03_all().at(idx));
else if (vname == "ak4jet:btagDeepFlavB")
{
if (tas::Muon_jetIdx().at(idx) == -1) input_vars[vname] = static_cast<float>(0.);
if (tas::Muon_jetIdx().at(idx) != -1) input_vars[vname] = static_cast<float>(tas::Jet_btagDeepFlavB().at(tas::Muon_jetIdx().at(idx)));
}
else
{
std::cerr << "WWZ::computeMuonTopMVAScore: Input variable name " << vname << " does not match to a corresponding variable" << endl;
std::cerr << "Have you loaded the XGBoost binary? i.e. did you call muonLoadMVA()?" << std::endl;
assert(0);
}
}

muon_mvareader_map->eval(input_vars, res);

return res;
}

bool WWZ::muonID(int idx, WWZ::IDLevel id_level, int year) {
// Year-specific checks
switch (year) {
case (2016):
return WWZ::muon2016ID(idx, id_level);
return WWZ::muonRun2ID(idx);
break;
case (2017):
return WWZ::muon2017ID(idx, id_level);
return WWZ::muonRun2ID(idx);
break;
case (2018):
return WWZ::muon2018ID(idx, id_level);
return WWZ::muonRun2ID(idx);
break;
case (2022):
return WWZ::muon2022ID(idx, id_level);
Expand All @@ -153,36 +240,16 @@ bool WWZ::muonID(int idx, WWZ::IDLevel id_level, int year) {
}
}

bool WWZ::muon2016ID(unsigned int idx, WWZ::IDLevel id_level) {
if (not (Muon_pt().at(idx) > 10. )) return false;
if (not (fabs(Muon_eta().at(idx)) < 2.4 )) return false;
if (not (fabs(Muon_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(Muon_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(Muon_sip3d().at(idx)) < 8 )) return false;
if (not (Muon_miniPFRelIso_all().at(idx) < 0.4 )) return false;
if (not (Muon_looseId().at(idx) )) return false;
return true;
}

bool WWZ::muon2017ID(unsigned int idx, WWZ::IDLevel id_level) {
if (not (Muon_pt().at(idx) > 10. )) return false;
if (not (fabs(Muon_eta().at(idx)) < 2.4 )) return false;
if (not (fabs(Muon_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(Muon_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(Muon_sip3d().at(idx)) < 8 )) return false;
if (not (Muon_miniPFRelIso_all().at(idx) < 0.4 )) return false;
if (not (Muon_looseId().at(idx) )) return false;
return true;
}

bool WWZ::muon2018ID(unsigned int idx, WWZ::IDLevel id_level) {
if (not (Muon_pt().at(idx) > 10. )) return false;
if (not (fabs(Muon_eta().at(idx)) < 2.4 )) return false;
if (not (fabs(Muon_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(Muon_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(Muon_sip3d().at(idx)) < 8 )) return false;
if (not (Muon_miniPFRelIso_all().at(idx) < 0.4 )) return false;
if (not (Muon_looseId().at(idx) )) return false;
bool WWZ::muonRun2ID(unsigned int idx) {
// These cuts are common for all ID levels of Muon
if (not (tas::Muon_pt().at(idx) > 10. )) return false;
if (not (fabs(tas::Muon_eta().at(idx)) < 2.4 )) return false;
if (not (fabs(tas::Muon_dxy().at(idx)) < 0.05 )) return false;
if (not (fabs(tas::Muon_dz().at(idx)) < 0.1 )) return false;
if (not (fabs(tas::Muon_sip3d().at(idx)) < 8 )) return false;
if (not (tas::Muon_miniPFRelIso_all().at(idx) < 0.4 )) return false;
if (not (tas::Muon_mediumId().at(idx) )) return false;
if (not (computeMuonTopMVAScore(idx) > 0.64 )) return false;
return true;
}

Expand Down
8 changes: 5 additions & 3 deletions NanoCORE/MuonSelections.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define MUONSELECTIONS_H
#include "Nano.h"
#include "Base.h"
#include "Tools/xgboost/XGBoostInterface.h"

namespace SS {
bool muonID(unsigned int idx, SS::IDLevel id_level, int year);
Expand All @@ -15,10 +16,11 @@ namespace ttH {
}

namespace WWZ {
extern std::shared_ptr<XGBoostInterface> muon_mvareader_map;
void muonLoadMVA(std::string fname);
float computeMuonTopMVAScore(unsigned int idx);
bool muonID(int idx, WWZ::IDLevel id_level, int year);
bool muon2016ID(unsigned int idx, WWZ::IDLevel id_level);
bool muon2017ID(unsigned int idx, WWZ::IDLevel id_level);
bool muon2018ID(unsigned int idx, WWZ::IDLevel id_level);
bool muonRun2ID(unsigned int idx);
bool muon2022ID(unsigned int idx, WWZ::IDLevel id_level);
}

Expand Down
Loading
Loading