import datetime import os import sys import shutil from lobster import cmssw from lobster.core import AdvancedOptions, Category, Config, Dataset,ParentDataset, StorageConfiguration, Workflow sys.path.append(os.getcwd()) from helpers.utils import regex_match, run_process MODIFIED_CFG_DIR = "python_cfgs/modified" timestamp_tag = datetime.datetime.now().strftime('%Y%m%d_%H%M') input_path = "/store/user/" input_path_full = "/hadoop" + input_path #master_label = 'EFT_CRC_postLHE_crc_{tstamp}'.format(tstamp=timestamp_tag) #master_label = 'EFT_ALL_postLHE_{tstamp}'.format(tstamp=timestamp_tag) #master_label = 'EFT_T3_postLHE_{tstamp}'.format(tstamp=timestamp_tag) master_label = 'EFT_testNAOD_T3_postLHE_{tstamp}'.format(tstamp=timestamp_tag) ########## Set up the lobster cfg ########## # Note: Should not have to modify things outside of this section, unless you want to: # - Hardcode lhe dirs to use # - Modify gen cfgs # Specify what kind of output to make #STEPS = 'throughGEN' STEPS = 'throughMAOD' #STEPS = 'throughNAOD' PATH_TO_NAOD_CMSSW = "/afs/crc.nd.edu/user/k/kmohrman/CMSSW_Releases/CMSSW_10_6_19_patch2" # Specfy the run setup #RUN_SETUP = 'full_production' #RUN_SETUP = 'mg_studies' RUN_SETUP = 'testing' # Specify the UL year #UL_YEAR = 'UL16' #UL_YEAR = 'UL16APV' #UL_YEAR = 'UL17' UL_YEAR = 'UL18' # Name the output out_ver = "v1" # The version index for the OUTPUT directory #out_tag = "FullR2Studies/ULChecks/ttXJet-tXq_testUpdateGenproddim6TopMay20GST_GEN_ULCheck" out_tag = "FullR2Studies/ValidationChecks/ttXJet_dim6TopMay20GST_run0StartPt_qCutScan_GEN_" #out_tag = "ForPhenoJhepReviewStudies/ttZJet_sampleForDoubleCheckingQcut_dim6TopMay20GST_GEN_" prod_tag = "Round1/Batch1" # Append UL year to out tag out_tag = out_tag + UL_YEAR # Only run over lhe steps from specific processes/coeffs/runs process_whitelist = [] coeff_whitelist = [] runs_whitelist = [] # (i.e. MG starting points) # Specify the input directories. Note: The workflows in each of the input directories should all be uniquely named w.r.t each other input_dirs = [ #os.path.join(input_path_full,"kmohrman/LHE_step/FullR2Studies/ULChecks/ttXJet-tXq_testUpdateGenproddim6TopMay20GST_ULCheck-UL16/v1"), #os.path.join(input_path_full,"kmohrman/LHE_step/FullR2Studies/ULChecks/ttXJet-tXq_testUpdateGenproddim6TopMay20GST_ULCheck-UL16APV/v1"), #os.path.join(input_path_full,"kmohrman/LHE_step/FullR2Studies/ULChecks/ttXJet-tXq_testUpdateGenproddim6TopMay20GST_ULCheck-UL17/v1"), #os.path.join(input_path_full,"kmohrman/LHE_step/FullR2Studies/ULChecks/ttXJet-tXq_testUpdateGenproddim6TopMay20GST_ULCheck-UL18/v1"), #os.path.join(input_path_full,"kmohrman/FullProduction/FullR2/UL17/Round1/Batch1/LHE_step/v1/"), #os.path.join(input_path_full,"kmohrman/FullProduction/FullR2/UL17/Round1/Batch2/LHE_step/v1/"), #os.path.join(input_path_full,"kmohrman/FullProduction/FullR2/UL17/Round1/Batch3/LHE_step/v1/"), os.path.join(input_path_full,"kmohrman/FullProduction/FullR2/UL18/Round1/Batch1/LHE_step/v1/"), ] ########## Select input directories according to whitelists ########## lhe_dirs = [] for path in input_dirs: for fd in os.listdir(path): if fd.find('lhe_step_') < 0: continue arr = fd.split('_') p,c,r = arr[2],arr[3],arr[4] if len(regex_match([p],process_whitelist)) == 0: continue elif len(regex_match([c],coeff_whitelist)) == 0: continue elif len(regex_match([r],runs_whitelist)) == 0: continue relpath = os.path.relpath(path,input_path_full) lhe_dirs.append(os.path.join(relpath,fd)) ''' # Hardcode the lhe dirs by hand lhe_dirs = [ # For Full R2 #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttllJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheck_UL17/v1/lhe_step_ttHJet_all22WCsStartPtCheckdim6TopMay20GST_run0", #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttllJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheck_UL17/v1/lhe_step_ttlnuJet_all22WCsStartPtCheckdim6TopMay20GST_run0", #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttllJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheck_UL17/v1/lhe_step_ttllNuNuJetNoHiggs_all22WCsStartPtCheckdim6TopMay20GST_run0", #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttllJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheck_UL17/v1/lhe_step_tllq4fNoSchanWNoHiggs0p_all22WCsStartPtCheckdim6TopMay20GST_run0", #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttllJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheck_UL17/v1/lhe_step_tHq4f_all22WCsStartPtCheckdim6TopMay20GST_run0", #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttllJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheck_UL17/v1/lhe_step_ttbarJet_all22WCsStartPtCheckdim6TopMay20GST_run0", #"kmohrman/LHE_step/FullR2Studies/ValidationChecks/ttHJet-ttlnuJet-ttbarJet-tllq-tHq_dim6TopMay20GST_all22WCsStartPtCheckV2_UL17/v1/lhe_step_tllq4fNoSchanWNoHiggs0p_all22WCsStartPtCheckV2dim6TopMay20GST_run0" "kmohrman/LHE_step/FullR2Studies/ULChecks/ttXJet-tXq_testUpdateGenproddim6TopMay20GST_ULCheck-UL17/v1/lhe_step_tllq4fNoSchanWNoHiggs0p_testUpdateGenproddim6TopMay20GST_run1" ] ''' ########## Set up output based on run setup ########## if RUN_SETUP == 'mg_studies': # For MadGraph test studies output_path = "/store/user/$USER/postLHE_step/{tag}/{ver}".format(tag=out_tag,ver=out_ver) workdir_path = "/tmpscratch/users/$USER/postLHE_step/{tag}/{ver}".format(tag=out_tag,ver=out_ver) plotdir_path = "~/www/lobster/postLHE_step/{tag}/{ver}".format(tag=out_tag,ver=out_ver) elif RUN_SETUP == 'full_production': # For Large MC production output_path = "/store/user/$USER/FullProduction/FullR2/{ul}/{tag}/postLHE_step/{ver}".format(ul=UL_YEAR,tag=prod_tag,ver=out_ver) workdir_path = "/tmpscratch/users/$USER/FullProduction/FullR2/{ul}/{tag}/postLHE_step/{ver}".format(ul=UL_YEAR,tag=prod_tag,ver=out_ver) plotdir_path = "~/www/lobster/FullProduction/FullR2/{ul}/{tag}/postLHE_step/{ver}".format(ul=UL_YEAR,tag=prod_tag,ver=out_ver) elif RUN_SETUP == 'testing': # For test runs (where you do not intend to keep the output) grp_tag = "lobster_{tstamp}".format(tstamp=timestamp_tag) output_path = "/store/user/$USER/postLHE_step/tests/{tag}/{ver}".format(tag=grp_tag,ver=out_ver) workdir_path = "/tmpscratch/users/$USER/postLHE_step/tests/{tag}/{ver}".format(tag=grp_tag,ver=out_ver) plotdir_path = "~/www/lobster/postLHE_step/tests/{tag}/{ver}".format(tag=grp_tag,ver=out_ver) else: print "Unknown run setup, {setup}".format(setup=RUN_SETUP) raise ValueError ########## Configure storage ########## storage = StorageConfiguration( input=[ "hdfs://eddie.crc.nd.edu:19000" + input_path, "root://deepthought.crc.nd.edu/" + input_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + input_path, "srm://T3_US_NotreDame" + input_path, ], output=[ "hdfs://eddie.crc.nd.edu:19000" + output_path, # ND is not in the XrootD redirector, thus hardcode server. "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, "file:///hadoop" + output_path, ], disable_input_streaming=False, ) ########## Resources for each step ########## # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 gen_resources = Category( name='gen', cores=1, memory=2000, disk=1000, tasks_min=12, tasks_max=3000, mode='fixed' ) sim_resources = Category( name='sim', cores=6, memory=3000, disk=3000, tasks_min=12, mode='fixed' ) digi_resources = Category( name='digi', cores=6, memory=7800, disk=6000, mode='fixed' ) hlt_resources = Category( name='hlt', cores=2, memory=5000, disk=6000, mode='fixed' ) reco_resources = Category( name='reco', cores=3, memory=5000, disk=3000, mode='fixed' ) maod_resources = Category( name='maod', cores=2, memory=3500, disk=2000, mode='fixed' ) naod_resources = Category( name='naod', cores=2, memory=3500, disk=2000, mode='fixed' ) ########## Set up dictionary for cfg files ########## wf_steps = ['gen','sim','digi','hlt','reco','maod','naod'] ul_base = 'ul_cfgs' ul_cfg_map = { 'UL16' : { 'all_procs' : { 'sim' : os.path.join(ul_base,'UL16_SIM_cfg.py'), 'digi' : os.path.join(ul_base,'UL16_DIGI_cfg.py'), 'hlt' : os.path.join(ul_base,'UL16_HLT_cfg.py'), 'reco' : os.path.join(ul_base,'UL16_RECO_cfg.py'), 'maod' : os.path.join(ul_base,'UL16_MAOD_cfg.py'), 'naod' : os.path.join(ul_base,'UL16_NAOD_cfg.py'), } }, 'UL16APV' : { 'all_procs' : { 'sim' : os.path.join(ul_base,'UL16APV_SIM_cfg.py'), 'digi' : os.path.join(ul_base,'UL16APV_DIGI_cfg.py'), 'hlt' : os.path.join(ul_base,'UL16APV_HLT_cfg.py'), 'reco' : os.path.join(ul_base,'UL16APV_RECO_cfg.py'), 'maod' : os.path.join(ul_base,'UL16APV_MAOD_cfg.py'), 'naod' : os.path.join(ul_base,'UL16APV_NAOD_cfg.py'), } }, 'UL17' : { 'all_procs' : { 'sim' : os.path.join(ul_base,'UL17_SIM_cfg.py'), 'digi' : os.path.join(ul_base,'UL17_DIGI_cfg.py'), 'hlt' : os.path.join(ul_base,'UL17_HLT_cfg.py'), 'reco' : os.path.join(ul_base,'UL17_RECO_cfg.py'), 'maod' : os.path.join(ul_base,'UL17_MAOD_cfg.py'), 'naod' : os.path.join(ul_base,'UL17_NAOD_cfg.py'), } }, 'UL18' : { 'all_procs' : { 'sim' : os.path.join(ul_base,'UL18_SIM_cfg.py'), 'digi' : os.path.join(ul_base,'UL18_DIGI_cfg.py'), 'hlt' : os.path.join(ul_base,'UL18_HLT_cfg.py'), 'reco' : os.path.join(ul_base,'UL18_RECO_cfg.py'), 'maod' : os.path.join(ul_base,'UL18_MAOD_cfg.py'), 'naod' : os.path.join(ul_base,'UL18_NAOD_cfg.py'), } } } gen_ul_cfg_map = { 'UL16' : { 'ttHJet' : { 'gen': os.path.join(ul_base,'UL16_GEN_ttHJet_cfg.py'), }, 'ttlnuJet' : { 'gen': os.path.join(ul_base,'UL16_GEN_ttlnuJet_cfg.py'), }, 'ttllNuNuJetNoHiggs' : { 'gen': os.path.join(ul_base,'UL16_GEN_ttlnuJet_cfg.py'), }, 'tllq4fNoSchanWNoHiggs0p' : { 'gen': os.path.join(ul_base,'UL16_GEN_ttlnu_cfg.py'), }, 'tHq4f' : { 'gen': os.path.join(ul_base,'UL16_GEN_ttlnu_cfg.py'), } }, 'UL16APV' : { 'ttHJet' : { 'gen': os.path.join(ul_base,'UL16APV_GEN_ttHJet_cfg.py'), }, 'ttlnuJet' : { 'gen': os.path.join(ul_base,'UL16APV_GEN_ttlnuJet_cfg.py'), }, 'ttllNuNuJetNoHiggs' : { 'gen': os.path.join(ul_base,'UL16APV_GEN_ttlnuJet_cfg.py'), }, 'tllq4fNoSchanWNoHiggs0p' : { 'gen': os.path.join(ul_base,'UL16APV_GEN_ttlnu_cfg.py'), }, 'tHq4f' : { 'gen': os.path.join(ul_base,'UL16APV_GEN_ttlnu_cfg.py'), } }, 'UL17' : { 'ttHJet' : { 'gen': os.path.join(ul_base,'UL17_GEN_ttHJet_cfg.py'), }, 'ttlnuJet' : { 'gen': os.path.join(ul_base,'UL17_GEN_ttlnuJet_cfg.py'), }, 'ttllNuNuJetNoHiggs' : { 'gen': os.path.join(ul_base,'UL17_GEN_ttlnuJet_cfg.py'), }, 'tllq4fNoSchanWNoHiggs0p' : { 'gen': os.path.join(ul_base,'UL17_GEN_ttlnu_cfg.py'), }, 'tHq4f' : { 'gen': os.path.join(ul_base,'UL17_GEN_ttlnu_cfg.py'), } }, 'UL18' : { 'ttHJet' : { 'gen': os.path.join(ul_base,'UL18_GEN_ttHJet_cfg.py'), }, 'ttlnuJet' : { 'gen': os.path.join(ul_base,'UL18_GEN_ttlnuJet_cfg.py'), }, 'ttllNuNuJetNoHiggs' : { 'gen': os.path.join(ul_base,'UL18_GEN_ttlnuJet_cfg.py'), }, 'tllq4fNoSchanWNoHiggs0p' : { 'gen': os.path.join(ul_base,'UL18_GEN_ttlnu_cfg.py'), }, 'tHq4f' : { 'gen': os.path.join(ul_base,'UL18_GEN_ttlnu_cfg.py'), } }, } # Put the gen configs into the ul cfg map fragment_map = ul_cfg_map[UL_YEAR] for k,v in gen_ul_cfg_map[UL_YEAR].iteritems(): fragment_map[k] = v ########## Specify CMSSW rel for each step ########## rel_map = { 'UL16' : { 'gen' : 'CMSSW_10_6_19_patch3', 'sim' : 'CMSSW_10_6_17_patch1', 'digi': 'CMSSW_10_6_17_patch1', 'hlt' : 'CMSSW_8_0_33_UL', 'reco': 'CMSSW_10_6_17_patch1', 'maod': 'CMSSW_10_6_20', 'naod': 'PATH_TO_NAOD_CMSSW', }, 'UL16APV' : { 'gen' : 'CMSSW_10_6_19_patch3', 'sim' : 'CMSSW_10_6_17_patch1', 'digi': 'CMSSW_10_6_17_patch1', 'hlt' : 'CMSSW_8_0_33_UL', 'reco': 'CMSSW_10_6_17_patch1', 'maod': 'CMSSW_10_6_20', 'naod': 'PATH_TO_NAOD_CMSSW', }, 'UL17' : { 'gen' : 'CMSSW_10_6_19_patch3', 'sim' : 'CMSSW_10_6_17_patch1', 'digi': 'CMSSW_10_6_17_patch1', 'hlt' : 'CMSSW_9_4_14_UL_patch1', 'reco': 'CMSSW_10_6_17_patch1', 'maod': 'CMSSW_10_6_20', 'naod': 'PATH_TO_NAOD_CMSSW', }, 'UL18' : { 'gen' : 'CMSSW_10_6_19_patch3', 'sim' : 'CMSSW_10_6_17_patch1', 'digi': 'CMSSW_10_6_17_patch1', 'hlt' : 'CMSSW_10_2_16_UL', 'reco': 'CMSSW_10_6_17_patch1', 'maod': 'CMSSW_10_6_20', 'naod': 'PATH_TO_NAOD_CMSSW', }, } ########## Optionally modify the GEN cfgs ########## gs_mods_dict = {} gs_mods_dict["base"] = {} gs_mods_dict["base"]["base"] = [] ''' # Example of q cut variation gs_mods_dict["ttHJet"] = {} gs_mods_dict["ttHJet"]['qCut15'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 15.|g'] gs_mods_dict["ttHJet"]['qCut20'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 20.|g'] gs_mods_dict["ttHJet"]['qCut25'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 25.|g'] gs_mods_dict["ttlnuJet"] = {} gs_mods_dict["ttlnuJet"]['qCut15'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 15.|g'] gs_mods_dict["ttlnuJet"]['qCut20'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 20.|g'] gs_mods_dict["ttlnuJet"]['qCut25'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 25.|g'] gs_mods_dict["ttbarJet"] = {} gs_mods_dict["ttbarJet"]['qCut15'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 15.|g'] gs_mods_dict["ttbarJet"]['qCut20'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 20.|g'] gs_mods_dict["ttbarJet"]['qCut25'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 25.|g'] gs_mods_dict["ttllNuNuJetNoHiggs"] = {} gs_mods_dict["ttllNuNuJetNoHiggs"]['qCut15'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 15.|g'] gs_mods_dict["ttllNuNuJetNoHiggs"]['qCut20'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 20.|g'] gs_mods_dict["ttllNuNuJetNoHiggs"]['qCut25'] = ['s|JetMatching:qCut = 20.|JetMatching:qCut = 25.|g'] ''' # Turn matching off here, since we are using a fragment wiht matching turned on gs_mods_dict["ttH"] = {} gs_mods_dict["ttH"]["MatchOff"] = ['s|JetMatching:merge = on|JetMatching:merge = off|g'] gs_mods_dict["ttHTOll"] = {} gs_mods_dict["ttHTOll"]["MatchOff"] = ['s|JetMatching:merge = on|JetMatching:merge = off|g'] ########## Generate workflows ########## wf = [] print "Generating workflows:" for idx,lhe_dir in enumerate(lhe_dirs): # Raise exception if trying to make UL sample but the UL year is not in the path anywhere if ( (UL_YEAR not in lhe_dir) or ((UL_YEAR == "UL16") and ("APV" in lhe_dir)) ): print "\nWARNING: UL year selected, but lhe dir path does not contain this UL year in it anywhere, are you sure you have the right path? Please double check." print "\tUL Year:" , UL_YEAR, "\n\tPath:" , lhe_dir, "\nExiting...\n" raise Exception print "\t[{0}/{1}] LHE Input: {dir}".format(idx+1,len(lhe_dirs),dir=lhe_dir) head,tail = os.path.split(lhe_dir) arr = tail.split('_') p,c,r = arr[2],arr[3],arr[4] #print("p c r:",p,c,r) if p in gs_mods_dict: gs_mods = gs_mods_dict[p] else: gs_mods = gs_mods_dict["base"] for mod_tag,sed_str_list in gs_mods.iteritems(): wf_fragments = {} for step in wf_steps: if step == 'gen': if (p=="ttH" or p=="ttHTOll"): # We don't have a ttH UL config, but can just use ttHJet if we turn off matching template_loc = fragment_map["ttHJet"][step] elif (p=="ttll" or p=="ttllNoHiggs" or p=="ttW"): # We don't have a ttll config, but can just use the ttlnu one (since matching already off) template_loc = fragment_map["tllq4fNoSchanWNoHiggs0p"][step] elif (p=="tHTOllq4fNoSchanW" or p=="tllq4fNoSchanW"): # We don't have thest singe top configs but just use tllnu one (since matching already off) template_loc = fragment_map["tllq4fNoSchanWNoHiggs0p"][step] elif (p=="ttWJet" or p=="ttZJet" or p=="ttbarJet"): template_loc = fragment_map["ttlnuJet"][step] else: template_loc = fragment_map[p][step] else: template_loc = fragment_map["all_procs"][step] # Only the GEN step can be modified if step == 'gen': head,tail = os.path.split(template_loc) # This should be a unique identifier within a single lobster master to ensure we dont overwrite a cfg file too early cfg_tag = '{tag}-{idx}'.format(tag=mod_tag,idx=idx) tail = tail.replace("cfg.py","{tag}_cfg.py".format(tag=cfg_tag)) mod_loc = os.path.join(MODIFIED_CFG_DIR,tail) shutil.copy(template_loc,mod_loc) for sed_str in sed_str_list: if sed_str: run_process(['sed','-i','-e',sed_str,mod_loc]) else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p,c=c,r=r,mod=mod_tag) gen = Workflow( label='gen_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gen']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['gen']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, # Do not accidently clean up the LHE files!!! globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset( files=lhe_dir, files_per_task=1, patterns=["*.root"] ), category=gen_resources ) sim = Workflow( label='sim_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['sim']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['sim']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, #cleanup_input=False, globaltag=False, outputs=['SIM-00000.root'], dataset=ParentDataset( parent=gen, units_per_task=1 ), category=sim_resources ) digi = Workflow( label='digi_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['digi']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['digi']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, #cleanup_input=False, outputs=['DIGI-00000.root'], dataset=ParentDataset( parent=sim, units_per_task=1 ), category=digi_resources ) hlt = Workflow( label='hlt_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['hlt']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['hlt']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, #cleanup_input=False, outputs=['HLT-00000'], dataset=ParentDataset( parent=digi, units_per_task=1 ), category=hlt_resources ) reco = Workflow( label='reco_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['reco']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['reco']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, #cleanup_input=False, outputs=['RECO-00000.root'], dataset=ParentDataset( parent=hlt, units_per_task=2 ), category=reco_resources ) maod = Workflow( label='mAOD_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['maod']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['maod']), merge_size='256M', #merge_size=-1, cleanup_input=True, #cleanup_input=False, outputs=['MAOD-00000.root'], dataset=ParentDataset( parent=reco, units_per_task=3 ), category=maod_resources ) naod = Workflow( label='nAOD_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['naod']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['naod']), merge_size='256M', merge_command='python haddnano.py @outputfiles @inputfiles', extra_inputs=[os.path.join(PATH_TO_NAOD_CMSSW,'src/PhysicsTools/NanoAODTools/scripts/haddnano.py')], cleanup_input=False, # Leave the MAOD files outputs=['NAOD-00000.root'], dataset=ParentDataset( parent=maod, units_per_task=3 ), category=naod_resources ) # Specify which steps to run if (STEPS == 'throughGEN'): wf.extend([gen]) elif (STEPS == 'throughMAOD'): wf.extend([gen,sim,digi,hlt,reco,maod]) elif (STEPS == 'throughNAOD'): wf.extend([gen,sim,digi,hlt,reco,maod,naod]) else: print "\nUnknown steps" , STEPS , "exiting...\n" raise Exception config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard = False, bad_exit_codes=[127, 160], log_level=1, payload=10, xrootd_servers=['ndcms.crc.nd.edu', 'cmsxrootd.fnal.gov', 'deepthought.crc.nd.edu'] ) )