File DataSource.h
-
namespace podio
Functions
-
ROOT::RDataFrame CreateDataFrame(const std::vector<std::string> &filePathList, const std::vector<std::string> &collsToRead = {})
Create RDataFrame from multiple Podio files.
- Parameters:
filePathList – [in] List of file paths from which the RDataFrame will be created.
collsToRead – [in] List of collection names that should be made available
- Returns:
RDataFrame created from input file list.
-
ROOT::RDataFrame CreateDataFrame(const std::string &filePath, const std::vector<std::string> &collsToRead = {})
Create RDataFrame from a Podio file or glob pattern matching multiple Podio files.
- Parameters:
filePath – [in] File path from which the RDataFrame will be created. The file path can include glob patterns to match multiple files.
collsToRead – [in] List of collection names that should be made available
- Returns:
RDataFrame created from input file list.
-
class DataSource : public ROOT::RDF::RDataSource
- #include <podio/DataSource.h>
Public Functions
-
explicit DataSource(const std::string &filePath, int nEvents = -1, const std::vector<std::string> &collsToRead = {})
Construct the podio::DataSource from the provided file.
- Parameters:
filePath – Path to the file that should be read
nEvents – Number of events to process (optional, defaults to -1 for all events)
collsToRead – The collections that should be made available (optional, defaults to empty vector for all collections)
-
explicit DataSource(const std::vector<std::string> &filePathList, int nEvents = -1, const std::vector<std::string> &collsToRead = {})
Construct the podio::DataSource from the provided file list.
- Parameters:
filePathList – Paths to the files that should be read
nEvents – Number of events to process (optional, defaults to -1 for all events)
collsToRead – The collections that should be made available (optional, defaults to empty vector for all collections)
-
void SetNSlots(unsigned int nSlots) override
Inform the podio::DataSource of the desired level of parallelism.
-
void Initialize() override
Inform podio::DataSource that an event-loop is about to start.
-
std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() override
Retrieve from podio::DataSource a set of ranges of entries that can be processed concurrently.
-
void InitSlot(unsigned int slot, ULong64_t firstEntry) override
Inform podio::DataSource that a certain thread is about to start working on a certain range of entries.
-
bool SetEntry(unsigned int slot, ULong64_t entry) override
Inform podio::DataSource that a certain thread is about to start working on a certain entry.
-
void FinalizeSlot(unsigned int slot) override
Inform podio::DataSource that a certain thread finished working on a certain range of entries.
-
void Finalize() override
Inform podio::DataSource that an event-loop finished.
-
const std::vector<std::string> &GetColumnNames() const override
Returns a reference to the collection of the dataset’s column names.
-
bool HasColumn(std::string_view columnName) const override
Checks if the dataset has a certain column.
-
std::string GetTypeName(std::string_view columnName) const override
Type of a column as a string.
Required for JITting.
-
inline std::string GetLabel() override
Protected Functions
-
std::vector<void*> GetColumnReadersImpl(std::string_view name, const std::type_info &typeInfo) override
Type-erased vector of pointers to pointers to column values — one per slot.
-
inline std::string AsString() override
Private Functions
-
void SetupInput(int nEvents, const std::vector<std::string> &collsToRead)
Setup input for the podio::DataSource.
- Parameters:
nEvents – [in] Number of events.
Private Members
-
unsigned int m_nSlots = 1
Number of slots/threads.
-
std::vector<std::string> m_filePathList = {}
Input filename.
-
ULong64_t m_nEvents = 0
Total number of events.
-
std::vector<std::pair<ULong64_t, ULong64_t>> m_rangesAvailable = {}
Ranges of events available to be processed.
-
std::vector<std::pair<ULong64_t, ULong64_t>> m_rangesAll = {}
Ranges of events available ever created.
-
std::vector<std::string> m_columnNames = {}
Column names.
-
std::vector<std::string> m_columnTypes = {}
Column types.
-
std::vector<std::vector<const podio::CollectionBase*>> m_Collections = {}
Collections, m_Collections[columnIndex][slotIndex].
-
std::vector<unsigned int> m_activeCollections = {}
Active collections.
-
explicit DataSource(const std::string &filePath, int nEvents = -1, const std::vector<std::string> &collsToRead = {})
-
ROOT::RDataFrame CreateDataFrame(const std::vector<std::string> &filePathList, const std::vector<std::string> &collsToRead = {})