Source code for Moore.persistence.truth_matching

###############################################################################
# (c) Copyright 2020-2023 CERN for the benefit of the LHCb Collaboration           #
#                                                                             #
# This software is distributed under the terms of the GNU General Public      #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
#                                                                             #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization  #
# or submit itself to any jurisdiction.                                       #
###############################################################################
"""Configuration for ProtoParticle-to-MCParticle association.

See the `truth_matching` method for more details on the API, and Moore#197
for general discussion on truth matching in Moore.
"""
import logging

try:
    from functools import cache
except ImportError:
    # Python 2 compatibility
    from cachetools import cached
    cache = lambda func: cached(cache={})(func)

from PyConf.Algorithms import (
    ChargedProtoParticleAssociator,
    MergeLinksByKeysToVector,
    MergeRelationsTablesPP2MCP,
    NeutralProtoParticleAssociator,
)
from PyConf.components import force_location
from PyConf.control_flow import CompositeNode, NodeLogic

from RecoConf.data_from_file import brunel_links, mc_unpacker
from RecoConf import mc_checking

from RecoConf.data_from_file import boole_links_digits_mcparticles
from PyConf.Algorithms import (
    CaloClusterMCTruth, CaloFutureDigit2MCLinks2Table,
    LHCb__Converters__Calo__Hypo2TruthTable__v1__fromV2)
from PyConf.Algorithms import CaloFutureHypoMCTruth

log = logging.getLogger(__name__)
#: TES location for charged ProtoParticle-to-MCParticle relations
CHARGED_PP2MC_LOC = "Relations/ChargedPP2MCP"
#: TES location for neutral ProtoParticle-to-MCParticle relations
NEUTRAL_PP2MC_LOC = "Relations/NeutralPP2MCP"
# Container type we match against to define 'a container of ProtoParticle objects'
PROTOPARTICLE_CONTAINER_T = "KeyedContainer<LHCb::ProtoParticle,Containers::KeyedObjectManager<Containers::hashmap> >"
# Container type we match against to define 'a container of Track objects'
TRACK_CONTAINER_T = "KeyedContainer<LHCb::Event::v1::Track,Containers::KeyedObjectManager<Containers::hashmap> >"
# Container type we match against to define 'a container of CaloCluster objects'
CALOCLUSTER_CONTAINER_T = "KeyedContainer<LHCb::CaloCluster,Containers::KeyedObjectManager<Containers::hashmap> >"

CALOCLUSTERS_V2_T = "LHCb::Event::Calo::v2::Clusters"
CALODIGITS_V2_T = "LHCb::Event::Calo::v2::Digits"
CALOHYPO_V2_T = "LHCb::Event::Calo::v2::Hypotheses"
CALOHYPO_V1_T = "KeyedContainer<LHCb::CaloHypo,Containers::KeyedObjectManager<Containers::hashmap> >"


@cache
def _collect_dependencies(root, type_name, check_outputs=False):
    """Return inputs to `root` with the given type name.

    The algorithm performs a depth-first search on the data dependency tree
    of `root`. The search stops traversing a branch once it finds an ancestor
    with the requested type. This reflects the LHCb v1 event model where, for
    example, tracks can be built from tracks and calorimeter hypos can be
    built from hypos; the goal of this function is to return the 'highest
    level' of the given type.
    """
    matches = set()
    # Depth-first search from the root
    to_list = lambda x: x if isinstance(x, list) else [x]

    # first check inputs
    for input_list in map(to_list, root.inputs.values()):
        for input_handle in input_list:
            if input_handle.type == type_name:
                matches.add(input_handle)
                continue
            matches.update(
                _collect_dependencies(input_handle.producer, type_name))

    # if not in inputs, it maybe in accompanying outputs
    if len(matches) == 0:
        # For tracks, looking specific for copier algorithm 'ChargedProtoParticleFilteredCopyAlg'
        # used in converting persistency locations versions
        for output_list in map(to_list, root.outputs.values()):
            for output_handle in output_list:
                if output_handle.type == type_name:
                    if type_name == TRACK_CONTAINER_T:
                        if 'Rec/Track' in output_handle.location:
                            matches.add(output_handle)
                    else:
                        matches.add(output_handle)

    return matches


def _find_protoparticles(candidates):
    """Return all ProtoParticle containers referenced by candidates.

    The data dependency tree for line candidates is traversed using
    `_collect_dependencies` to find ProtoParticle containers, and these are
    sorted into categories:

    1. Charged (they refer to a Track object)
    2. Neutral (they refer to a CaloHypo object)
    3. Charged from Brunel (they refer to a fixed PackedProto location)
    4. Neutral from Brunel (they refer to a fixed PackedProto location)

    Args:
        candidates (list of candidates)

    Returns:
        (charged, neutral), (charged_brunel, neutral_brunel):
            Each a set of DataHandle objects that match the respective
            category.
    """
    protos = {
        dh
        for output in candidates for dh in _collect_dependencies(
            output.producer, PROTOPARTICLE_CONTAINER_T)
    }
    charged = set()
    neutral = set()
    charged_brunel = set()
    neutral_brunel = set()

    # Heuristically determine whether a given container of ProtoParticles was
    # created from the raw event or was taken from the input file. We must
    # truth-match differently between the two cases.
    for dh in protos:
        alg = dh.producer
        prop = alg.inputs.get("InputName")
        # If there is a packed container location in the data dependency tree,
        # assume that these protos come from the input file (i.e. from Brunel)
        if prop is not None and (prop.location.startswith("/Event/pRec") or
                                 prop.location.startswith("/Event/HLT2/pRec")):

            # We happen to know that Brunel outputs charged and neutral protos
            # to so-named locations
            if "Neutral" in prop.location:
                neutral_brunel.add(dh)
            else:
                charged_brunel.add(dh)
        else:
            # Assume charged ProtoParticle container if the maker has CALO
            # clusters in its dependencies
            if _collect_dependencies(dh.producer, CALOCLUSTER_CONTAINER_T):
                neutral.add(dh)
            else:
                charged.add(dh)

    def dh_sorted(handles):
        return sorted(handles, key=hash)

    return (dh_sorted(charged),
            dh_sorted(neutral)), (dh_sorted(charged_brunel),
                                  dh_sorted(neutral_brunel))


def _match_charged(charged, charged_brunel, mc_particles):
    relations = []

    # Track association from file
    if charged_brunel:
        track_links_brunel = brunel_links("Tracks")
        assoc_tracks_brunel = MergeLinksByKeysToVector(
            InputLinksByKeys=[track_links_brunel])

    # Need these links for performing track association from raw
    links_to_lhcbids = mc_checking.make_links_lhcbids_mcparticles_tracking_and_muon_system(
    )

    # Track association from raw
    for dh in charged:
        tracks = _collect_dependencies(
            dh.producer, TRACK_CONTAINER_T, check_outputs=True)
        assert len(
            tracks
        ) == 1, "Unexpected number of track dependencies - expected 1, got " + str(
            len(tracks)) + " instead"
        tracks = dict(v1=tracks.pop())
        assoc_tracks = mc_checking.make_links_tracks_mcparticles(
            InputTracks=tracks,
            LinksToLHCbIDs=links_to_lhcbids,
        )
        assoc_tracks_merged = MergeLinksByKeysToVector(
            InputLinksByKeys=[assoc_tracks])
        assoc_charged = ChargedProtoParticleAssociator(
            InputProtoParticles=dh,
            InputMCParticles=mc_particles,
            InputAssociations=assoc_tracks_merged,
        )
        relations.append(assoc_charged.OutputTable)

    # Track association from file
    for dh in charged_brunel:
        assoc_charged = ChargedProtoParticleAssociator(
            InputProtoParticles=dh,
            InputMCParticles=mc_particles,
            InputAssociations=assoc_tracks_brunel,
        )
        relations.append(assoc_charged.OutputTable)

    return relations


def _match_neutral(neutral, neutral_brunel, mc_particles):
    relations = []

    # CaloHypo association from raw
    if neutral:
        # We only expect a single neutral container
        assert len(
            neutral
        ) == 1, "Unexpected multiple neutral ProtoParticle containers"

        neutral = neutral.pop()

        clus_v2 = _collect_dependencies(neutral.producer, CALOCLUSTERS_V2_T)
        assert len(
            clus_v2) == 2, "Unexpected number of clusters v2 dependencies"
        clusters_v2 = clus_v2.pop()
        splitClusters_v2 = clus_v2.pop()
        foundMergedPi0_in_splitClusters_v2 = False
        for conf in splitClusters_v2.producer.configuration():
            for subconf in conf:
                if type(subconf) == type(str()) and "MergedPi0" in subconf:
                    foundMergedPi0_in_splitClusters_v2 = True
        if not (foundMergedPi0_in_splitClusters_v2):
            clusters_v2, splitClusters_v2 = splitClusters_v2, clusters_v2

        dig_v2 = _collect_dependencies(neutral.producer, CALODIGITS_V2_T)
        ecalDigits_v2 = dig_v2.pop()

        hyp_v2 = _collect_dependencies(neutral.producer, CALOHYPO_V2_T)
        assert len(hyp_v2) == 2, "Unexpected number of hypotheses dependencies"
        photons_v2 = hyp_v2.pop()
        pi0s_v2 = hyp_v2.pop()
        if "MergedPi0" in photons_v2.location:
            photons_v2, pi0s_v2 = pi0s_v2, photons_v2

        clus_v1 = _collect_dependencies(neutral.producer,
                                        CALOCLUSTER_CONTAINER_T)
        assert len(
            clus_v1) == 2, "Unexpected number of clusters v1 dependencies"
        clusters_v1 = clus_v1.pop()
        splitClusters_v1 = clus_v1.pop()
        foundMergedPi0_in_splitClusters_v1 = False
        for conf in splitClusters_v1.producer.configuration():
            for subconf in conf:
                if type(subconf) == type(str()) and "MergedPi0" in subconf:
                    foundMergedPi0_in_splitClusters_v1 = True
        if not (foundMergedPi0_in_splitClusters_v1):
            clusters_v1, splitClusters_v1 = splitClusters_v1, clusters_v1

        hypos_v1 = _collect_dependencies(neutral.producer, CALOHYPO_V1_T)
        assert len(hypos_v1) == 3, "Unexpected number of hypos v1 dependencies"
        photons_v1 = hypos_v1.pop()
        pi0s_v1 = hypos_v1.pop()
        splitPhotons_v1 = hypos_v1.pop()
        if "SplitPhotons" in splitPhotons_v1.location and "MergedPi0" in photons_v1.location:
            photons_v1, pi0s_v1, splitPhotons_v1 = pi0s_v1, photons_v1, splitPhotons_v1
        if "SplitPhotons" in pi0s_v1.location and "MergedPi0" in splitPhotons_v1.location:
            photons_v1, pi0s_v1, splitPhotons_v1 = photons_v1, splitPhotons_v1, pi0s_v1
        if "SplitPhotons" in pi0s_v1.location and "MergedPi0" in photons_v1.location:
            photons_v1, pi0s_v1, splitPhotons_v1 = splitPhotons_v1, photons_v1, pi0s_v1
        if "SplitPhotons" in photons_v1.location and "MergedPi0" in splitPhotons_v1.location:
            photons_v1, pi0s_v1, splitPhotons_v1 = pi0s_v1, splitPhotons_v1, photons_v1
        if "SplitPhotons" in photons_v1.location and "MergedPi0" in pi0s_v1.location:
            photons_v1, pi0s_v1, splitPhotons_v1 = splitPhotons_v1, pi0s_v1, photons_v1

        # get Digit2MC table
        tableMCCaloDigits = CaloFutureDigit2MCLinks2Table(
            CaloDigits=ecalDigits_v2,
            MCParticles=mc_particles,
            Link=boole_links_digits_mcparticles("EcalDigitsV1"),
        ).Output

        # get Clusterv22MC table
        tableMCCaloClusters = CaloClusterMCTruth(
            InputRelations=tableMCCaloDigits,
            Input=boole_links_digits_mcparticles('EcalDigits'),
            MCParticleLocation=mc_particles,
            Clusters=clusters_v2).Output

        tableMCPhotons = CaloFutureHypoMCTruth(
            InputHypos=photons_v2,
            InputClusterTable=tableMCCaloClusters,
        )

        tableMCPi0s = CaloFutureHypoMCTruth(
            InputHypos=pi0s_v2,
            InputClusterTable=tableMCCaloClusters,
        )

        # FIXME: work around to get objects that are not explicit dependencies of NeutralProtoParticles
        photons_v1_fromv2_converter = photons_v1.producer
        photons_v1_fromv2_OutputTable = photons_v1_fromv2_converter.OutputTable

        pi0s_v1_fromv2_converter = pi0s_v1.producer
        pi0s_v1_fromv2_OutputTable = pi0s_v1_fromv2_converter.OutputTable

        cvrtPhotons = LHCb__Converters__Calo__Hypo2TruthTable__v1__fromV2(
            InputHypos=photons_v1,
            Old2New=photons_v1_fromv2_OutputTable,
            InputTable=tableMCPhotons.OutputTable,
        )

        cvrtPi0s = LHCb__Converters__Calo__Hypo2TruthTable__v1__fromV2(
            InputHypos=pi0s_v1,
            Old2New=pi0s_v1_fromv2_OutputTable,
            InputTable=tableMCPi0s.OutputTable,
        )
        listInput = [cvrtPhotons.OutputLinks, cvrtPi0s.OutputLinks]

        assoc_hypos = MergeLinksByKeysToVector(InputLinksByKeys=listInput)

        assoc_neutral = NeutralProtoParticleAssociator(
            InputProtoParticles=neutral,
            InputMCParticles=mc_particles,
            InputAssociations=assoc_hypos,
        )
        relations.append(assoc_neutral.OutputTable)

    # CaloHypo association from file
    if neutral_brunel:
        # We only expect a single neutral container from Brunel
        assert len(
            neutral_brunel
        ) == 1, "Unexpected multiple neutral ProtoParticle containers"
        neutral_brunel = neutral_brunel.pop()
        hypo_types_brunel = [
            "CaloMergedPi0s", "CaloPhotons", "CaloSplitPhotons"
        ]
        hypo_links_brunel = [brunel_links(t) for t in hypo_types_brunel]
        assoc_hypos_brunel = MergeLinksByKeysToVector(
            InputLinksByKeys=hypo_links_brunel)
        assoc_neutral_brunel = NeutralProtoParticleAssociator(
            InputProtoParticles=neutral_brunel,
            InputMCParticles=mc_particles,
            InputAssociations=assoc_hypos_brunel,
        )
        relations.append(assoc_neutral_brunel.OutputTable)

    return relations


[docs]def truth_match_candidates(candidates): """Return relations tables for the objects produced by the candidates. Associates ProtoParticle objects selected by lines to MCParticle objects. If the line outputs refer to the reconstruction from the file (produced by Brunel), the associations are created using the LinksByKey objects from the file (produced by Brunel). If the line outputs refer to the Moore reconstruction, the associations are created from the ground up using the lower-level associators already in Moore. However the relations are made, they are first made per ProtoParticle container. In order to provide a fixed number of output tables in well-defined locations, the relations tables we make in Moore are merged into two tables: one for charged protos, and one for neutral. Those locations are returned by this method. Args: candidates (list of candidates) Returns: matching_cf (CompositeNode): Algorithms needed to run the matching. outputs (list of str): ProtoParticle-to-MCParticle relations tables. Note: Associating neutral objects created from the Moore reconstruction is currently not supported. This method will raise an `AssertionError` if it finds line outputs that refer to neutral Moore reconstruction. """ (charged, neutral), (charged_brunel, neutral_brunel) = _find_protoparticles(candidates) mc_particles = mc_unpacker("MCParticles") mc_vertices = mc_unpacker("MCVertices") # TODO factor out configuration common to RecoConf.mc_checking and use that # rather than duplicate it here charged_relations = _match_charged(charged, charged_brunel, mc_particles) neutral_relations = _match_neutral(neutral, neutral_brunel, mc_particles) # These algorithms produce the relations tables we 'export', so we force # their locations to a fixed location charged_merged = MergeRelationsTablesPP2MCP( InputRelationsTables=charged_relations, outputs=dict(Output=force_location(CHARGED_PP2MC_LOC)), ) neutral_merged = MergeRelationsTablesPP2MCP( InputRelationsTables=neutral_relations, outputs=dict(Output=force_location(NEUTRAL_PP2MC_LOC)), ) protoparticle_relations = [charged_merged.Output, neutral_merged.Output] # Have to run the MC unpackers by hand for now; if the unpackers become # functional we can remove the need for this CF node ## TODO: instead of using an explicit control flow node, add explicit # 'hand-configured' data dependencies... matching_cf = CompositeNode( "truth_matching", children=[mc_particles, mc_vertices, charged_merged, neutral_merged], combine_logic=NodeLogic.NONLAZY_OR, force_order=True, ) return matching_cf, protoparticle_relations
[docs]def truth_match_lines(lines): """Return relations tables for the objects produced by the lines. Line outputs (candidates) are collected and passed to :func:`truth_match_candidates`. See there for more details on the persistence. """ candidates = {cand for line in lines for cand in line.objects_to_persist} return truth_match_candidates(candidates)