import json import uproot import numpy as np #def get_info(fname, tree_name = "Events"): # # The info we want to get # raw_events = 0 # The raw number of entries as reported by TTree.num_entries # gen_events = 0 # Number of gen events according to 'genEventCount' or set to raw_events if not found # sow_events = 0 # Sum of weights # sow_lhe_wgts = 0 # Sum of LHE weights # is_data = False # print(f"Opening with uproot: {fname}") # with uproot.open(fname) as f: # tree = f[tree_name] # is_data = not "genWeight" in tree # # raw_events = int(tree.num_entries) # if is_data: # # Data doesn't have gen or weighted events! # gen_events = raw_events # sow_events = raw_events # else: # gen_events = raw_events # sow_events = sum(tree["genWeight"]) # if "Runs" in f: # # Instead get event from the "Runs" tree # runs = f["Runs"] # gen_key = "genEventCount" if "genEventCount" in runs else "genEventCount_" # sow_key = "genEventSumw" if "genEventSumw" in runs else "genEventSumw_" # gen_events = sum(runs[gen_key].array()) # sow_events = sum(runs[sow_key].array()) # # # Get the LHE weights array (note it's stored as a ratio, so multiply by sow before summing) # # list_to_output = [raw_events, gen_events] #, sow_events, sow_lhe_wgts, is_data] # # return list_to_output import uproot def get_info(fname, tree_name="Events", error_log="corrupt_files.txt"): raw_events = 0 gen_events = 0 sow_events = 0 sow_lhe_wgts = 0 is_data = False print(f"Opening with uproot: {fname}") try: with uproot.open(fname) as f: if tree_name not in f: raise KeyError(f"Tree '{tree_name}' not found in {fname}") tree = f[tree_name] is_data = "genWeight" not in tree raw_events = int(tree.num_entries) if is_data: gen_events = raw_events sow_events = raw_events else: gen_events = raw_events sow_events = sum(tree["genWeight"]) if "Runs" in f: runs = f["Runs"] gen_key = "genEventCount" if "genEventCount" in runs else "genEventCount_" sow_key = "genEventSumw" if "genEventSumw" in runs else "genEventSumw_" gen_events = sum(runs[gen_key].array()) sow_events = sum(runs[sow_key].array()) except Exception as e: print(f"Error processing {fname}: {e}") with open(error_log, "a") as log_file: log_file.write(f"{fname}\n") return None return [raw_events, gen_events] #json_path = "../../input_samples/sample_jsons/signal_samples/private_UL/UL17_ttgamma_dilept_NDSkim.json" json_path = "../../input_samples/sample_jsons/signal_samples/private_UL/UL18_ttgamma_dilept_v2_NDSkim.json" #json_path = "../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_ttgamma_dilept_NDSkim.json" json_path = "../../input_samples/sample_jsons/signal_samples/private_UL/UL16APV_ttgamma_dilept.json" json_path = "../../input_samples/sample_jsons/signal_samples/private_UL/UL18_ttgamma_dilept_b1.json" json_path = "../../input_samples/sample_jsons/signal_samples/private_UL/UL16_ttgamma_dilept_b1.json" total_nEvents = 0 total_genEvents = 0 total_sow_events = 0 with open(json_path, "r") as f: data = json.load(f) skip_until = "441860" skip =True if "files" in data: root_files = data["files"] for file_path in root_files: if skip: if skip_until in file_path: print(file_path) skip=False continue file_path = "/cms/cephfs/data/"+file_path list_to_output = get_info(file_path) total_nEvents += list_to_output[0] total_genEvents += list_to_output[1] print(total_nEvents, total_genEvents)