import datetime import os from lobster import cmssw from lobster.core import AdvancedOptions, Category, Config, Dataset, StorageConfiguration, Workflow # This lobster config runs over LHE tier EDM root files, produced using custom MadGraph gridpacks # It produces output trees similar to OSTwoLepAna.cc, but can run over LHE level EDM files. It stores # only the eftwgts and the original xsec wgt as well as basic run,LS,etc. info timestamp_tag = datetime.datetime.now().strftime('%Y%m%d_%H%M') #username = "awightma" username = "kmohrman" #RUN_SETUP = 'local' #RUN_SETUP = 'full_production' RUN_SETUP = 'mg_studies' input_version = "" output_version = "v1" #grp_tag = "2019_04_19/tllq4f_t-channelMatched_pythia-JetMax1_b2" grp_tag = "" #out_tag = "tllq4f_t-channelMatched_pythia-JetMax1_b2" out_tag = "2019_08_14_addPtBranches/ttHJet-ttWJet_R5B1-HanV4Model-Comp_analysisEtaCut" test_tag = "lobster_20180505_1440" # If the input LHE files were also produced in 'local' running prod_tag = "Round5/Batch1" # Only run over gridpacks from specific processes/coeffs/runs (i.e. MG starting points) #process_whitelist = ["ttlnuJet","ttllNuNuJetNoHiggs","ttHJet"] process_whitelist = ["ttlnuJet","ttHJet"] coeff_whitelist = [] runs_whitelist = [] #master_label = 'EFT_T3_%s' % (timestamp_tag) master_label = 'EFT_LHE_%s' % (timestamp_tag) if RUN_SETUP == 'local': # For quick generic lobster workflow testing input_path = "/store/user/%s/tests/%s" % (username,test_tag) output_path = "/store/user/$USER/tests/lobster_%s" % (timestamp_tag) workdir_path = "/tmpscratch/users/$USER/tests/lobster_%s" % (timestamp_tag) plotdir_path = "~/www/lobster/tests/lobster_%s" % (timestamp_tag) elif RUN_SETUP == 'mg_studies': # For MadGraph test studies input_path = "/store/user/%s/postLHE_step/%s/%s/" % (username,grp_tag,input_version) output_path = "/store/user/$USER/summaryTree_LHE/%s-mAOD/%s" % (out_tag,output_version) workdir_path = "/tmpscratch/users/$USER/summaryTree_LHE/%s-mAOD/%s" % (out_tag,output_version) plotdir_path = "~/www/lobster/summaryTree_LHE/%s-mAOD/%s" % (out_tag,output_version) elif RUN_SETUP == 'full_production': input_path = "/store/user/{user}/FullProduction/{prod}/postLHE_step/{ver}/".format(user=username,prod=prod_tag,ver=input_version) output_path = "/store/user/$USER/summaryTree_LHE/FP/{prod}/{tag}-mAOD/{ver}".format(tag=out_tag,prod=prod_tag,ver=output_version) workdir_path = "/tmpscratch/users/$USER/summaryTree_LHE/FP/{prod}/{tag}-mAOD/{ver}".format(tag=out_tag,prod=prod_tag,ver=output_version) plotdir_path = "~/www/lobster/summaryTree_LHE/FP/{prod}/{tag}-mAOD/{ver}".format(tag=out_tag,prod=prod_tag,ver=output_version) else: print "Unknown run setup, %s" % (RUN_SETUP) raise ValueError input_path = "/store/user/" input_path_full = "/hadoop" + input_path dir_list = [ os.path.join(input_path_full,"awightma/FullProduction/Round5/Batch1/postLHE_step/v1"), os.path.join(input_path_full,"kmohrman/postLHE_step/2019_04_19/ttXJetTests-HanV4Model-xqcut10qCut19/v3"), os.path.join(input_path_full,"kmohrman/postLHE_step/2019_04_19/ttHJet-ttWJet-HanV2ModelNOttgghCheck-xqcut10qCut19/v1"), ] storage = StorageConfiguration( input=[ "hdfs://eddie.crc.nd.edu:19000" + input_path, "root://deepthought.crc.nd.edu/" + input_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + input_path, "srm://T3_US_NotreDame" + input_path, ], output=[ "hdfs://eddie.crc.nd.edu:19000" + output_path, "file:///hadoop" + output_path, # ND is not in the XrootD redirector, thus hardcode server. "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, ], ) processing = Category( name='processing', cores=1, memory=1200, disk=1000 #mode='fixed' ) maod_dirs = [] for path in dir_list: for f in os.listdir(path): if not os.path.isdir(path): continue arr = f.split('_') if arr[0] != 'mAOD': continue elif len(os.listdir(path)) == 0: print "[WARNING] Skipping empty directory, %s" % (f) continue p,c,r = arr[2],arr[3],arr[4] if len(process_whitelist) > 0 and not p in process_whitelist: continue elif len(coeff_whitelist) > 0 and not c in coeff_whitelist: continue elif len(runs_whitelist) > 0 and not r in runs_whitelist: continue relpath = os.path.relpath(path,input_path_full) maod_dirs.append(os.path.join(relpath,f)) wf = [] print "Generating workflows:" for idx,maod_dir in enumerate(maod_dirs): #arr = maod_dir.split('_') head,tail = os.path.split(maod_dir) arr = tail.split('_') p,c,r = arr[2],arr[3],arr[4] cms_cmd = ['cmsRun','EFTLHEReader_cfg.py'] cms_cmd.extend(['datatier=MINIAODSIM']) print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx+1,tot=len(maod_dirs),dir=maod_dir) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p,c=c,r=r), command=' '.join(cms_cmd), merge_size='1.0G', cleanup_input=False, dataset=Dataset( files=maod_dir, files_per_task=5, patterns=["*.root"] ), category=processing ) wf.extend([output]) config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( bad_exit_codes=[127, 160], log_level=1, ) )