REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
TRestDataSet.h
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
23#ifndef REST_TRestDataSet
24#define REST_TRestDataSet
25
26#include <TTimeStamp.h>
27
28#include <ROOT/RDataFrame.hxx>
29
30#include "TRestCut.h"
31#include "TRestMetadata.h"
32
35 public:
38 std::string metadata;
39
41 std::string strategy;
42
44 std::string description;
45
47 std::string value;
48 };
49
50 private:
52 std::string fFilterStartTime = "2000/01/01"; //<
53
55 std::string fFilterEndTime = "3000/12/31"; //<
56
58 std::string fFilePattern = ""; //<
59
61 std::vector<std::string> fObservablesList; //<
62
64 std::vector<std::string> fProcessObservablesList; //<
65
67 std::vector<std::string> fFilterMetadata; //<
68
70 std::vector<std::string> fFilterContains; //<
71
73 std::vector<Double_t> fFilterGreaterThan; //<
74
76 std::vector<Double_t> fFilterLowerThan; //<
77
79 std::vector<Double_t> fFilterEqualsTo; //<
80
82 std::map<std::string, RelevantQuantity> fQuantity; //<
83
85 TRestCut* fCut = nullptr; //<
86
88 Double_t fTotalDuration = 0; //<
89
91 std::vector<std::string> fFileSelection; //<
92
95
98
100 Bool_t fMergedDataset = false; //<
101
103 std::vector<std::string> fImportedFiles; //<
104
106 std::vector<std::pair<std::string, std::string>> fColumnNameExpressions; //<
107
109 Bool_t fMT = false; //<
110
111 // If the dataframe was defined externally it will be true
112 Bool_t fExternal = false; //<
113
115 ROOT::RDF::RNode fDataFrame = ROOT::RDataFrame(0);
116
118 TChain* fTree = nullptr;
119
120 void InitFromConfigFile() override;
121
122 protected:
123 virtual std::vector<std::string> FileSelection();
124
125 void RegenerateTree(std::vector<std::string> finalList = {});
126
127 public:
129 ROOT::RDF::RNode GetDataFrame() const {
130 if (!fExternal && fTree == nullptr)
131 RESTWarning << "DataFrame has not been yet initialized" << RESTendl;
132 return fDataFrame;
133 }
134
135 void EnableMultiThreading(Bool_t enable = true) { fMT = enable; }
136
138 TTree* GetTree() const {
139 if (fTree == nullptr && fExternal) {
140 RESTInfo << "The tree is not accessible. Only GetDataFrame can be used in an externally "
141 "generated dataset"
142 << RESTendl;
143 RESTInfo << "You may write a tree using GetDataFrame()->Snapshot(\"MyTree\", \"output.root\");"
144 << RESTendl;
145 return fTree;
146 }
147
148 if (fTree == nullptr) {
149 RESTError << "Tree has not been yet initialized" << RESTendl;
150 RESTError << "You should invoke TRestDataSet::GenerateDataSet() or " << RESTendl;
151 RESTError << "TRestDataSet::Import( fname ) before trying to access the tree" << RESTendl;
152 }
153 return fTree;
154 }
155
157 size_t GetNumberOfColumns() { return fDataFrame.GetColumnNames().size(); }
158
161
163 std::vector<std::string> GetFileSelection() { return fFileSelection; }
164
166 Double_t GetTotalTimeInSeconds() const { return fTotalDuration; }
167
168 inline auto GetFilterStartTime() const { return fFilterStartTime; }
169 inline auto GetFilterEndTime() const { return fFilterEndTime; }
170 inline auto GetStartTime() const { return fStartTime; }
171 inline auto GetEndTime() const { return fEndTime; }
172 inline auto GetFilePattern() const { return fFilePattern; }
173 inline auto GetObservablesList() const { return fObservablesList; }
174 inline auto GetFileSelection() const { return fFileSelection; }
175 inline auto GetProcessObservablesList() const { return fProcessObservablesList; }
176 inline auto GetFilterMetadata() const { return fFilterMetadata; }
177 inline auto GetFilterContains() const { return fFilterContains; }
178 inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; }
179 inline auto GetFilterLowerThan() const { return fFilterLowerThan; }
180 inline auto GetFilterEqualsTo() const { return fFilterEqualsTo; }
181 inline auto GetQuantity() const { return fQuantity; }
182 inline auto GetAddedColumns() const { return fColumnNameExpressions; }
183 inline auto GetCut() const { return fCut; }
184 inline auto IsMergedDataSet() const { return fMergedDataset; }
185
186 inline void SetObservablesList(const std::vector<std::string>& obsList) { fObservablesList = obsList; }
187 inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; }
188 inline void SetQuantity(const std::map<std::string, RelevantQuantity>& quantity) { fQuantity = quantity; }
189
190 void SetTotalTimeInSeconds(Double_t seconds) { fTotalDuration = seconds; }
191 void SetDataFrame(const ROOT::RDF::RNode& dS) {
192 fDataFrame = dS;
193 fExternal = true;
194 }
195
197 Bool_t Merge(const TRestDataSet& dS);
198 void Import(const std::string& fileName);
199 void Import(std::vector<std::string> fileNames);
200 void Export(const std::string& filename, std::vector<std::string> excludeColumns = {});
201
202 ROOT::RDF::RNode MakeCut(const TRestCut* cut);
203 ROOT::RDF::RNode ApplyRange(size_t from, size_t to);
204 ROOT::RDF::RNode Range(size_t from, size_t to);
205 ROOT::RDF::RNode DefineColumn(const std::string& columnName, const std::string& formula);
206
207 size_t GetEntries();
208
209 void PrintMetadata() override;
210 void Initialize() override;
211
212 void GenerateDataSet();
213
214 TRestDataSet();
215 TRestDataSet(const char* cfgFileName, const std::string& name = "");
217
218 ClassDefOverride(TRestDataSet, 8);
219};
220#endif
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition: TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
Definition: TRestDataSet.h:34
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
Definition: TRestDataSet.h:70
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
Definition: TRestDataSet.h:76
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
Definition: TRestDataSet.h:118
std::vector< std::string > fProcessObservablesList
It contains a list of the process where all observables should be added.
Definition: TRestDataSet.h:64
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
Definition: TRestDataSet.h:82
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
Definition: TRestDataSet.h:106
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
Definition: TRestDataSet.h:97
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
Definition: TRestDataSet.h:115
size_t GetNumberOfBranches()
Number of variables (or observables)
Definition: TRestDataSet.h:160
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
ROOT::RDF::RNode GetDataFrame() const
Gives access to the RDataFrame.
Definition: TRestDataSet.h:129
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
Definition: TRestDataSet.h:166
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
Definition: TRestDataSet.h:109
TRestCut * fCut
Parameter cuts over the selected dataset.
Definition: TRestDataSet.h:85
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Definition: TRestDataSet.h:52
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
Definition: TRestDataSet.h:67
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
Definition: TRestDataSet.h:91
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
Definition: TRestDataSet.h:163
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Definition: TRestDataSet.h:55
Double_t fStartTime
TimeStamp for the start time of the first file.
Definition: TRestDataSet.h:94
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
Definition: TRestDataSet.h:61
TTree * GetTree() const
Gives access to the tree.
Definition: TRestDataSet.h:138
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
Definition: TRestDataSet.h:100
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Definition: TRestDataSet.h:103
Double_t fTotalDuration
The total integrated run time of selected files.
Definition: TRestDataSet.h:88
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
Definition: TRestDataSet.h:58
size_t GetNumberOfColumns()
Number of variables (or observables)
Definition: TRestDataSet.h:157
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
Definition: TRestDataSet.h:73
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
Definition: TRestDataSet.h:79
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
Definition: TRestMetadata.h:74
endl_t RESTendl
Termination flag object for TRestStringOutput.
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::string metadata
The associated metadata member used to register the relevant quantity.
Definition: TRestDataSet.h:38
std::string description
A user given description that can be used to define the relevant quantity.
Definition: TRestDataSet.h:44
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
Definition: TRestDataSet.h:41
std::string value
The quantity value.
Definition: TRestDataSet.h:47