scikit-hep / hist

Histogramming for analysis powered by boost-histogram
https://hist.readthedocs.io
BSD 3-Clause "New" or "Revised" License
128 stars 25 forks source link

Ambiguous error message in filling histogram #577

Open bockjoo opened 4 months ago

bockjoo commented 4 months ago

Hi, Without knowning that the variables in the histogram filling have to be arrays with the same length, I passed a number to the binning (x-axis)and a weight of an array for the y-axis and I got this ambiguos error message:

File "/home/bockjoo/opt/cmsio2/cms/services/T2/ops/Work/AAA/vll-analysis.Coffea2024.6.1/lib/python3.12/site-packages/boost_histogram/_internal/hist.py", line 504, in fill
    self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars)  # type: ignore[arg-type]
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: spans must have compatible lengths

I am using:

Python 3.12.4 | packaged by Anaconda, Inc. | (main, Jun 18 2024, 15:12:24) [GCC 11.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import hist
>>> hist.__version__
'2.7.3'

This is the script that can demonstrate the issue:

import os
import ssl
import hist
import dask
import awkward as ak
import hist.dask as hda
import dask_awkward as dak

from coffea import processor
from coffea.nanoevents.methods import candidate
from distributed import Client
from coffea.nanoevents import NanoEventsFactory, BaseSchema, NanoAODSchema
from coffea.nanoevents.methods import candidate, nanoaod, vector
from coffea.analysis_tools import PackedSelection, Weights

class VLLProcessor(processor.ProcessorABC):
    def __init__(self, isMC=True, era="2018", writeOutParquet=False):
        self.isMC = isMC
        ak.behavior.update(nanoaod.behavior)
        dataset_axis = hda.hist.hist.axis.StrCategory([], growth=True, name="dataset", label="Dataset")
        cutflow_axis = hda.hist.hist.axis.StrCategory([], growth=True, name="cutflow",label="Cutflow")
        cut = hda.hist.hist.axis.Regular(14, 0, 14, name="cut", label=r"Cutflow")
        self.make_output = lambda: {
            "CutFlow": hda.hist.Hist(dataset_axis,cut,),
        }

    def process(self, events, shift_syst=None):
        dataset = events.metadata['dataset']
        output = self.make_output()
        selection = PackedSelection()
        selection.add("0", (events.Flag.goodVertices) & (events.Flag.globalSuperTightHalo2016Filter) & (events.Flag.HBHENoiseFilter) & (events.Flag.HBHENoiseIsoFilter) & 
                    (events.Flag.EcalDeadCellTriggerPrimitiveFilter) & (events.Flag.BadPFMuonFilter) & (((not (self.isMC)) & events.Flag.eeBadScFilter) | (self.isMC)) )
        wgt = selection.all("0")
        bin=0
        output["CutFlow"].fill(
                    dataset=dataset,
                    cut=b, # dak.ones_like(wgt)*bin,
                    weight=wgt,
        )

        return {dataset:output}        

    def postprocess(self, accumulator):
        pass

if __name__ == '__main__':
 filename = "root://cmsxrootd.hep.wisc.edu:1094//store/mc/RunIISummer20UL18NanoAODv9/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/130000/44187D37-0301-3942-A6F7-C723E9F4813D.root"

 events = NanoEventsFactory.from_root(
    {filename: "Events"},
    steps_per_file=2_000,
    metadata={"dataset": "TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8"},
    schemaclass=NanoAODSchema,
 ).events()
 p = VLLProcessor(isMC=True)
 out = p.process(events)

 (computed,) = dask.compute(out)
 print(computed)

Thanks, Bockjoo