#!/usr/bin/env python import numpy as np import awkward as ak np.seterr(divide='ignore', invalid='ignore', over='ignore') from coffea import hist, processor import topeft.modules.object_selection as te_os #from topcoffea.modules.corrections import get_ht_sf from topcoffea.modules.HistEFT import HistEFT import topcoffea.modules.eft_helper as efth from topcoffea.modules.get_param_from_jsons import GetParam from topcoffea.modules.paths import topcoffea_path get_tc_param = GetParam(topcoffea_path("params/params.json")) class AnalysisProcessor(processor.ProcessorABC): def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None, do_errors=False, do_systematics=False, split_by_lepton_flavor=False, skip_signal_regions=False, skip_control_regions=False, muonSyst='nominal', dtype=np.float32): self._samples = samples self._wc_names_lst = wc_names_lst self._dtype = dtype # Create the histograms self._accumulator = processor.dict_accumulator({ "mll_fromzg_e" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("mll_fromzg_e", "invmass ee from z/gamma", 40, 0, 200)), "mll_fromzg_m" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("mll_fromzg_m", "invmass mm from z/gamma", 40, 0, 200)), "mll_fromzg_t" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("mll_fromzg_t", "invmass tautau from z/gamma", 40, 0, 200)), "mll" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("mll", "Invmass l0l1", 60, 0, 600)), "ht" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("ht", "Scalar sum of genjet pt", 100, 0, 1000)), "ht_clean" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("ht_clean", "Scalar sum of clean genjet pt", 100, 0, 1000)), "tops_pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("tops_pt", "Pt of the sum of the tops", 50, 0, 500)), "l0_pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("l0_pt", "Pt of the leading lepton",50,0,500)), "j0_pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("j0_pt", "Pt of the leading jet",50,0,500)), "tX_pt" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("tX_pt", "Pt of the t(t)X system", 40, 0, 400)), "njets" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("njets", "njets", 10, 0, 10)), "dR_j_ph" : HistEFT("Events", wc_names_lst, hist.Cat("sample", "sample"), hist.Bin("dR_j_ph", "deltaR between jets and photons", 100, 0, 5)), }) # Set the list of hists to fill if hist_lst is None: # If the hist list is none, assume we want to fill all hists self._hist_lst = list(self._accumulator.keys()) else: # Otherwise, just fill the specified subset of hists for hist_to_include in hist_lst: if hist_to_include not in self._accumulator.keys(): raise Exception(f"Error: Cannot specify hist \"{hist_to_include}\", it is not defined in the processor.") self._hist_lst = hist_lst # Which hists to fill # Set the booleans self._do_errors = do_errors # Whether to calculate and store the w**2 coefficients @property def accumulator(self): return self._accumulator @property def columns(self): return self._columns # Main function: run on a given dataset def process(self, events): ### Dataset parameters ### dataset = events.metadata["dataset"] isData = self._samples[dataset]["isData"] histAxisName = self._samples[dataset]["histAxisName"] year = self._samples[dataset]["year"] xsec = self._samples[dataset]["xsec"] sow = self._samples[dataset]["nSumOfWeights"] if isData: raise Exception("Error: This processor is not for data") ### Get gen particles collection ### genpart = events.GenPart genjet = events.GenJet ### Lepton object selection ### is_final_mask = genpart.hasFlags(["fromHardProcess","isLastCopy"]) gen_top = ak.pad_none(genpart[is_final_mask & (abs(genpart.pdgId) == 6)],2) gen_l = genpart[is_final_mask & ((abs(genpart.pdgId) == 11) | (abs(genpart.pdgId) == 13) | (abs(genpart.pdgId) == 15))] gen_e = genpart[is_final_mask & (abs(genpart.pdgId) == 11)] gen_m = genpart[is_final_mask & (abs(genpart.pdgId) == 13)] gen_t = genpart[is_final_mask & (abs(genpart.pdgId) == 15)] gen_l = gen_l[ak.argsort(gen_l.pt, axis=-1, ascending=False)] gen_l = ak.pad_none(gen_l,2) gen_e = gen_e[ak.argsort(gen_e.pt, axis=-1, ascending=False)] gen_m = gen_m[ak.argsort(gen_m.pt, axis=-1, ascending=False)] gen_t = gen_t[ak.argsort(gen_t.pt, axis=-1, ascending=False)] gen_l_from_zg = ak.pad_none(gen_l[(gen_l.distinctParent.pdgId == 23) | (gen_l.distinctParent.pdgId == 22)], 2) gen_e_from_zg = ak.pad_none(gen_e[(gen_e.distinctParent.pdgId == 23) | (gen_e.distinctParent.pdgId == 22)], 2) gen_m_from_zg = ak.pad_none(gen_m[(gen_m.distinctParent.pdgId == 23) | (gen_m.distinctParent.pdgId == 22)], 2) gen_t_from_zg = ak.pad_none(gen_t[(gen_t.distinctParent.pdgId == 23) | (gen_t.distinctParent.pdgId == 22)], 2) # Jet object selection genjet = genjet[(genjet.pt > 30) & (abs(genjet.eta) < 2.4)] is_clean_jet = te_os.isClean(genjet, gen_e, drmin=0.4) & te_os.isClean(genjet, gen_m, drmin=0.4) & te_os.isClean(genjet, gen_t, drmin=0.4) genjet_clean = genjet[is_clean_jet] njets = ak.num(genjet_clean) genbjet = genjet_clean[genjet_clean.partonFlavour == 5] nbjets = ak.num(genbjet) #photon object selection gen_ph = genpart[is_final_mask & ((abs(genpart.pdgId) == 22))] gen_ph = gen_ph[(gen_ph.pt > 20) & (abs(gen_ph.eta) < 1.44)] gen_ph = gen_ph[ak.argsort(gen_ph.pt, axis=-1, ascending=False)] nphoton = ak.num(gen_ph) #Let's calculate dR between gen photons and jets j_nearest_to_ph, nearest_dR_j_ph = gen_ph.nearest(genjet_clean, return_metric=True) nearest_dR_j_ph = ak.fill_none(ak.pad_none(nearest_dR_j_ph,1),-1) ### Get dense axis variables ### ht = ak.sum(genjet.pt,axis=-1) ht_clean = ak.sum(genjet_clean.pt,axis=-1) tops_pt = gen_top.sum().pt # Pt of the t(t)X system tX_system = ak.concatenate([gen_top,gen_l_from_zg],axis=1) tX_pt = tX_system.sum().pt # Invmass distributions mll_e_from_zg = (gen_e_from_zg[:,0] + gen_e_from_zg[:,1]).mass mll_e_from_zg = ak.fill_none(mll_e_from_zg,-1) mll_m_from_zg = (gen_m_from_zg[:,0] + gen_m_from_zg[:,1]).mass mll_m_from_zg = ak.fill_none(mll_m_from_zg,-1) mll_t_from_zg = (gen_t_from_zg[:,0] + gen_t_from_zg[:,1]).mass mll_t_from_zg = ak.fill_none(mll_t_from_zg,-1) mll_l0l1 = (gen_l[:,0] + gen_l[:,1]).mass mll_l0l1 = ak.fill_none(mll_l0l1,-1) #other hists values l0pt = ak.fill_none(ak.firsts(gen_l.pt),-1) j0pt = ak.fill_none(ak.firsts(genjet.pt),-1) # Dictionary of dense axis values dense_axis_dict = { "mll_fromzg_e" : mll_e_from_zg, "mll_fromzg_m" : mll_m_from_zg, "mll_fromzg_t" : mll_t_from_zg, "mll" : mll_l0l1, #"ht" : ht, #"ht_clean" : ht_clean, #"tX_pt" : tX_pt, "l0_pt" : l0pt, "j0_pt" : j0pt, #"tops_pt" : tops_pt, "njets" : njets, "dR_j_ph": ak.flatten(nearest_dR_j_ph), } ### Get weights ### # Extract the EFT quadratic coefficients and optionally use them to calculate the coefficients on the w**2 quartic function # eft_coeffs is never Jagged so convert immediately to numpy for ease of use. eft_coeffs = ak.to_numpy(events["EFTfitCoefficients"]) if hasattr(events, "EFTfitCoefficients") else None if eft_coeffs is not None: # Check to see if the ordering of WCs for this sample matches what want if self._samples[dataset]["WCnames"] != self._wc_names_lst: eft_coeffs = efth.remap_coeffs(self._samples[dataset]["WCnames"], self._wc_names_lst, eft_coeffs) eft_w2_coeffs = efth.calc_w2_coeffs(eft_coeffs,self._dtype) if (self._do_errors and eft_coeffs is not None) else None # If this is not an eft sample, get the genWeight if eft_coeffs is None: genw = events["genWeight"] else: genw = np.ones_like(events["event"]) lumi = 1000.0*get_tc_param(f"lumi_{year}") event_weight = lumi*xsec*genw/sow #basic event selection event_selection_cut = ((ak.num(gen_l)>=2) & (njets>=1)) event_selection_cut = ak.fill_none(event_selection_cut,False) #let all events pass event_selection_cut = np.ones(len(events),dtype=bool) #basic event selection num_leps = (ak.num(gen_l)==2) os_2l = (((abs(gen_l[:,0].pdgId) + abs(gen_l[:,0].pdgId)) == 22) | ((abs(gen_l[:,0].pdgId) + abs(gen_l[:,0].pdgId)) == 26) | ((abs(gen_l[:,0].pdgId) + abs(gen_l[:,0].pdgId)) == 24)) num_ph = (nphoton == 1) num_jet = (njets >=1) num_bjet = (nbjets >= 1) mll = ((gen_l[:,0]+gen_l[:,1]).mass > 20) pt2515 = (ak.any(gen_l[:,0:1].pt > 25.0, axis=1) & ak.any(gen_l[:,1:2].pt > 15.0, axis=1)) event_selection_cut = (num_leps & os_2l & num_ph & num_jet & num_bjet & mll & pt2515) # Example of reweighting based on Ht #if "private" in histAxisName: # ht_sf = get_ht_sf(ht,histAxisName) # event_weight = event_weight*ht_sf ### Loop over the hists we want to fill ### hout = self.accumulator.identity() for dense_axis_name, dense_axis_vals in dense_axis_dict.items(): # Mask out the none values #isnotnone_mask = (ak.fill_none((dense_axis_vals != None),False)) all_cuts_mask = event_selection_cut #dense_axis_vals_cut = dense_axis_vals[isnotnone_mask] dense_axis_vals_cut = dense_axis_vals[all_cuts_mask] #event_weight_cut = event_weight[isnotnone_mask] event_weight_cut = event_weight[all_cuts_mask] eft_coeffs_cut = eft_coeffs #if eft_coeffs is not None: eft_coeffs_cut = eft_coeffs[isnotnone_mask] if eft_coeffs is not None: eft_coeffs_cut = eft_coeffs[all_cuts_mask] eft_w2_coeffs_cut = eft_w2_coeffs #if eft_w2_coeffs is not None: eft_w2_coeffs_cut = eft_w2_coeffs[isnotnone_mask] if eft_w2_coeffs is not None: eft_w2_coeffs_cut = eft_w2_coeffs[all_cuts_mask] # Fill the histos axes_fill_info_dict = { dense_axis_name : dense_axis_vals_cut, "sample" : histAxisName, "weight" : event_weight_cut, "eft_coeff" : eft_coeffs_cut, "eft_err_coeff" : eft_w2_coeffs_cut, } hout[dense_axis_name].fill(**axes_fill_info_dict) return hout def postprocess(self, accumulator): return accumulator