import datetime import os from lobster import cmssw from lobster.core import AdvancedOptions, Category, Config, Dataset, StorageConfiguration, Workflow # This lobster config runs over LHE tier EDM root files, produced using custom MadGraph gridpacks # It produces output trees similar to OSTwoLepAna.cc, but can run over LHE level EDM files. It stores # only the eftwgts and the original xsec wgt as well as basic run,LS,etc. info timestamp_tag = datetime.datetime.now().strftime('%Y%m%d_%H%M') #username = "awightma" username = "kmohrman" #RUN_SETUP = 'local' #RUN_SETUP = 'full_production' RUN_SETUP = 'mg_studies' input_version = "v1" output_version = "v1" #grp_tag = "2019_04_19/ttHJet-xqcutStudies-xqcut10qCutTests" grp_tag = "" #"2019_04_19/ttX-ttXJet-HanV4Model-0Jetvs1JetTests" #out_tag = "2019_08_14_addPtBranches/ttXJet_HanV4ttXJetStartPtChecks-xqcut10qCutTests_analysisEtaCut" out_tag = "2019_08_14_addPtBranches/tllq4fNoSchanWNoHiggs0p_HanV4tZqStartPtChecks-allRunsMatchOff_analysisEtaCut" test_tag = "lobster_20180505_1440" # If the input LHE files were also produced in 'local' running prod_tag = "Round1/Batch1" # Only run over gridpacks from specific processes/coeffs/runs (i.e. MG starting points) process_whitelist = [] coeff_whitelist = [] runs_whitelist = [] #master_label = 'EFT_T3_%s' % (timestamp_tag) master_label = 'EFT_LHE_%s' % (timestamp_tag) if RUN_SETUP == 'local': # For quick generic lobster workflow testing input_path = "/store/user/%s/tests/%s" % (username,test_tag) output_path = "/store/user/$USER/tests/lobster_%s" % (timestamp_tag) workdir_path = "/tmpscratch/users/$USER/tests/lobster_%s" % (timestamp_tag) plotdir_path = "~/www/lobster/tests/lobster_%s" % (timestamp_tag) elif RUN_SETUP == 'mg_studies': # For MadGraph test studies input_path = "/store/user/%s/genOnly_step/%s/%s/" % (username,grp_tag,input_version) output_path = "/store/user/$USER/summaryTree_LHE/%s-GEN/%s" % (out_tag,output_version) workdir_path = "/tmpscratch/users/$USER/summaryTree_LHE/%s-GEN/%s" % (out_tag,output_version) plotdir_path = "~/www/lobster/summaryTree_LHE/%s-GEN/%s" % (out_tag,output_version) elif RUN_SETUP == 'full_production': input_path = "/store/user/%s/FullProduction/%s/genOnly_step/%s/" % (username,prod_tag,input_version) output_path = "/store/user/$USER/summaryTree_LHE/FP/%s-GEN/%s" % (prod_tag,output_version) workdir_path = "/tmpscratch/users/$USER/summaryTree_LHE/FP/%s-GEN/%s" % (prod_tag,output_version) plotdir_path = "~/www/lobster/summaryTree_LHE/FP/%s-GEN/%s" % (prod_tag,output_version) else: print "Unknown run setup, %s" % (RUN_SETUP) raise ValueError input_path = "/store/user/" input_path_full = "/hadoop" + input_path dir_list = [ #os.path.join(input_path_full,"kmohrman/genOnly_step/2019_04_19/ttX-ttXJet-HanV4Model-0Jetvs1JetTests/v1"), #os.path.join(input_path_full,"kmohrman/genOnly_step/2019_04_19/ttHJet-ttWJet_HanV4ttXJetStartPtChecks-xqcut10qCut19/v1"), #os.path.join(input_path_full,"kmohrman/genOnly_step/2019_04_19/ttHJet-ttZJet_HanV4ttXJetStartPtChecks-xqcut10qCut19/v1"), #os.path.join(input_path_full,"kmohrman/genOnly_step/2019_04_19/ttXJet_HanV4ttXJetStartPtChecks-xqcut10qCutTests/v1"), #os.path.join(input_path_full,"kmohrman/genOnly_step/2019_04_19/tHq4f_HanV4tHqStartPtChecks-allRuns/v1"), os.path.join(input_path_full,"kmohrman/genOnly_step/2019_04_19/tllq4fNoSchanWNoHiggs0p_HanV4tHqStartPtChecks-allRuns/v1"), ] storage = StorageConfiguration( input=[ "hdfs://eddie.crc.nd.edu:19000" + input_path, "root://deepthought.crc.nd.edu/" + input_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + input_path, "srm://T3_US_NotreDame" + input_path, ], output=[ "hdfs://eddie.crc.nd.edu:19000" + output_path, "file:///hadoop" + output_path, # ND is not in the XrootD redirector, thus hardcode server. "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, ], ) processing = Category( name='processing', cores=1, memory=1200, disk=1000 #mode='fixed' ) gen_dirs = [] for path in dir_list: #for f in os.listdir(input_path_full): for f in os.listdir(path): #dir_path = os.path.join(input_path_full,f) #if not os.path.isdir(dir_path): if not os.path.isdir(path): continue arr = f.split('_') if arr[0] != 'gen': continue #elif len(os.listdir(dir_path)) == 0: elif len(os.listdir(path)) == 0: print "[WARNING] Skipping empty directory, %s" % (f) continue p,c,r = arr[2],arr[3],arr[4] if len(process_whitelist) > 0 and not p in process_whitelist: continue elif len(coeff_whitelist) > 0 and not c in coeff_whitelist: continue elif len(runs_whitelist) > 0 and not r in runs_whitelist: continue #gen_dirs.append(f) relpath = os.path.relpath(path,input_path_full) gen_dirs.append(os.path.join(relpath,f)) wf = [] print "Generating workflows:" for idx,gen_dir in enumerate(gen_dirs): #arr = gen_dir.split('_') head,tail = os.path.split(gen_dir) arr = tail.split('_') p,c,r = arr[2],arr[3],arr[4] print "\t[{n}/{tot}] GEN Input: {dir}".format(n=idx+1,tot=len(gen_dirs),dir=gen_dir) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p,c=c,r=r), command='cmsRun EFTLHEReader_cfg.py', merge_size='1.0G', cleanup_input=False, dataset=Dataset( files=gen_dir, files_per_task=5, # Remember that the GEN step already does 5-10 files per task patterns=["*.root"] ), category=processing ) wf.extend([output]) config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( bad_exit_codes=[127, 160], log_level=1, ) )