import datetime import os import sys import shutil from lobster import cmssw from lobster.core import AdvancedOptions, Category, Config, Dataset,ParentDataset, StorageConfiguration, Workflow sys.path.append(os.getcwd()) from helpers.utils import regex_match, run_process timestamp_tag = datetime.datetime.now().strftime('%Y%m%d_%H%M') input_path = "/store/user/" input_path_full = "/hadoop" + input_path RUN_SETUP = 'testing' out_ver = "v1" # The version index for the OUTPUT directory out_tag = "2019_04_19/TEST" master_label = 'EFT_ALL_postLHE_{tstamp}'.format(tstamp=timestamp_tag) if RUN_SETUP == 'testing': grp_tag = "lobster_{tstamp}".format(tstamp=timestamp_tag) output_path = "/store/user/$USER/postLHE_step/tests/{tag}/{ver}".format(tag=grp_tag,ver=out_ver) workdir_path = "/tmpscratch/users/$USER/postLHE_step/tests/{tag}/{ver}".format(tag=grp_tag,ver=out_ver) plotdir_path = "~/www/lobster/postLHE_step/tests/{tag}/{ver}".format(tag=grp_tag,ver=out_ver) else: print "Unknown run setup, {setup}".format(setup=RUN_SETUP) raise ValueError storage = StorageConfiguration( input=[ "hdfs://eddie.crc.nd.edu:19000" + input_path, "root://deepthought.crc.nd.edu/" + input_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + input_path, "srm://T3_US_NotreDame" + input_path, ], output=[ "hdfs://eddie.crc.nd.edu:19000" + output_path, # ND is not in the XrootD redirector, thus hardcode server. "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, "file:///hadoop" + output_path, ], disable_input_streaming=False, ) lhe_dirs = [ "kmohrman/LHE_step/FullR2Studies/PreliminaryStudies/tHq4f_testOldGenprod-HanV4/v1/lhe_step_tHq4f_testOldGenprodHanV4_run2" ] ################################################################# # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 ################################################################# # Need to be careful with using 'runtime' setting, as it can cause us to exceed the workers resources gen_resources = Category( name='gen', cores=1, memory=1200, disk=1000, tasks_min=12, tasks_max=3000, mode='fixed' ) sim_resources = Category( name='sim', cores=6, memory=3000, disk=3000, tasks_min=12, mode='fixed' ) ################################################################# wf = [] print "Generating workflows:" for idx,lhe_dir in enumerate(lhe_dirs): print "\t[{0}/{1}] LHE Input: {dir}".format(idx+1,len(lhe_dirs),dir=lhe_dir) head,tail = os.path.split(lhe_dir) arr = tail.split('_') p,c,r = arr[2],arr[3],arr[4] print("p c r:",p,c,r) label_tag = "{p}_{c}_{r}".format(p=p,c=c,r=r) gen = Workflow( label='gen_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg='python_cfgs/GEN/GEN-00000-tllq4f_1_cfg.py'), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset( files=lhe_dir, #files_per_task=2, files_per_task=1, patterns=["*.root"] ), category=gen_resources ) sim = Workflow( label='sim_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg='TRK-RunIISummer19UL16SIM-00003_1_cfg.py'), sandbox=cmssw.Sandbox(release='CMSSW_10_6_12'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['SIM-00000.root'], dataset=ParentDataset( parent=gen, units_per_task=1 ), category=sim_resources ) wf.extend([gen,sim]) config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard = False, bad_exit_codes=[127, 160], log_level=1, payload=10, xrootd_servers=['ndcms.crc.nd.edu', 'cmsxrootd.fnal.gov', 'deepthought.crc.nd.edu'] ) )