import datetime
import os
import subprocess

from EFTGenReader.GenReader.DatasetHelper import DatasetHelper

from lobster import cmssw
from lobster.core import AdvancedOptions, Category, Config, Dataset, StorageConfiguration, Workflow

# NOTE1: The cmsRun config likely doesn't correctly handle input samples which do not have any EFT reweighting
# NOTE2: The input samples must be in MAOD format

GIT_REPO_DIR = subprocess.check_output(['git','rev-parse','--show-toplevel']).strip()
timestamp_tag = datetime.datetime.now().strftime('%Y%m%d_%H%M')

# NOTE: All samples must be located somewhere under the directory specified by input_path
input_path = "/store/user/awightma/"

out_ver = "v1"
tag = 'NoTopLeptons-NoCuts'

master_label = 'EFT_LHE_{tstamp}'.format(tstamp=timestamp_tag)

RUN_MODE = 'testing'

#output_path = "/store/user/$USER/KinematicGenHists/{tag}/{ver}".format(tag=tag,ver=out_ver)
if RUN_MODE == 'testing':
    output_path  = "/store/user/$USER/tests/lobster_{tstamp}".format(tstamp=timestamp_tag)
    workdir_path = "/tmpscratch/users/$USER/tests/lobster_{tstamp}".format(tstamp=timestamp_tag)
    plotdir_path = "~/www/lobster/tests/lobster_{tstamp}".format(tstamp=timestamp_tag)
elif RUN_MODE == 'mg_studies':
    output_path  = "/store/user/$USER/KinematicGenHists/{tag}/{ver}".format(tag=tag,ver=out_ver)
    workdir_path = "/tmpscratch/users/$USER/KinematicGenHists/{tag}/{ver}".format(tag=tag,ver=out_ver)
    plotdir_path = "~/www/lobster/KinematicGenHists/{tag}/{ver}".format(tag=tag,ver=out_ver)

storage = StorageConfiguration(
    input=[
        "hdfs://eddie.crc.nd.edu:19000"  + input_path,
        "root://deepthought.crc.nd.edu/" + input_path,  # Note the extra slash after the hostname!
        "gsiftp://T3_US_NotreDame"       + input_path,
        "srm://T3_US_NotreDame"          + input_path,
    ],
    output=[
        "hdfs://eddie.crc.nd.edu:19000"  + output_path,
        "file:///hadoop"                 + output_path,
        # ND is not in the XrootD redirector, thus hardcode server.
        "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname!
        "gsiftp://T3_US_NotreDame"       + output_path,
        "srm://T3_US_NotreDame"          + output_path,
    ],
)

processing = Category(
    name='processing',
    cores=1,
    memory=1200,
    disk=1000
    #mode='fixed'
)

wf = []

ds_helper = DatasetHelper()
ds_helper.load(os.path.join(GIT_REPO_DIR,"GenReader/data/JSON/datasets.json"))

width = 1

#samples = [
    #'tllq4f_SMNoSchanW'
#]

samples = [
    'ttHJet_HanModelxqcut25-qCut30',
    'ttHJet_HanModelxqcut25-qCut45',
    'ttHJet_HanModelxqcut35-qCut45',
    'ttHJet_HanModelxqcut45-qCut45',
    'ttHJet_HanModelxqcut10-qCut19',
    'ttHJet_HanModelxqcut10-qCut30',
    'ttHJet_HanModelxqcut10-qCut45'
]


for idx,sample_name in enumerate(samples):
    if not ds_helper.exists(sample_name):
        print "[{0:0>{w}}/{1:0>{w}}] Skipping unknown sample: {sample}".format(idx+1,len(samples),sample=sample_name,w=width)
        continue
    sample_loc = ds_helper.getData(sample_name,'loc')
    full_path = sample_loc.split("/hadoop")[1]
    rel_path = os.path.relpath(full_path,input_path)

    print "[{0:0>{w}}/{1:0>{w}}] Sample: {sample}".format(idx+1,len(samples),sample=sample_name,w=width)
    print "\tFullPath:  {path}".format(path=full_path)
    print "\tInputPath: {path}".format(path=input_path)
    print "\tRelPath:   {path}".format(path=rel_path)

    cms_cmd = ['cmsRun','lobsterized_EFTGenReader_cfg.py']
    if not ds_helper.getData(sample_name,'is_eft'):
        cms_cmd.extend(['iseft=False'])

    # The workflow label can't have any dashes (-) in it, so remove them
    safe_label_name = sample_name.replace('-','')
    output = Workflow(
        label='output_{label}'.format(label=safe_label_name),
        #command='cmsRun lobsterized_EFTGenReader_cfg.py',
        command=' '.join(cms_cmd),
        cleanup_input=False,
        outputs=['output_tree.root'],
        merge_size='100.0G',  # This is set to a large value to make sure the final output is merged into a single file
        dataset=Dataset(
            files=rel_path,
            files_per_task=1,
            patterns=["*.root"]
        ),
        merge_command='hadd @outputfiles @inputfiles',
        category=processing
    )

    wf.extend([output])

config = Config(
    label=master_label,
    workdir=workdir_path,
    plotdir=plotdir_path,
    storage=storage,
    workflows=wf,
    advanced=AdvancedOptions(
        bad_exit_codes=[127, 160],
        log_level=1,
    )
)