85#include "TRestComponentDataSet.h"
144 RESTMetadata <<
" == Dataset filenames ==" <<
RESTendl;
152 RESTMetadata <<
"This component has no nodes!" <<
RESTendl;
153 RESTMetadata <<
" Use: LoadDataSets() to initialize the nodes" <<
RESTendl;
158 RESTMetadata <<
" == Weights ==" <<
RESTendl;
165 RESTMetadata <<
" Use : PrintStatistics() to check node statistics" <<
RESTendl;
175 if (!
HasNodes() && !IsDataSetLoaded()) {
176 RESTWarning <<
"TRestComponentDataSet::PrintStatistics. Empty nodes and no dataset loaded!"
178 RESTWarning <<
"Invoking TRestComponentDataSet::Initialize() might solve the problem" <<
RESTendl;
183 RESTInfo <<
"Total counts : " << result <<
RESTendl;
184 std::cout << std::endl;
201 while (ele !=
nullptr) {
220 <<
"TRestComponentDataSet::FillHistograms. Trying to fill histograms but no variables found!"
227 if (!IsDataSetLoaded()) {
228 RESTError <<
"TRestComponentDataSet::FillHistograms. Dataset has not been initialized!" <<
RESTendl;
233 RESTWarning <<
"Nodes have not been defined" <<
RESTendl;
234 RESTWarning <<
"The full dataset will be used to generate the density distribution" <<
RESTendl;
238 RESTInfo <<
"Generating N-dim histograms" <<
RESTendl;
249 ROOT::RDF::RNode df = ROOT::RDataFrame(0);
253 RESTInfo <<
"Creating component with no parameters (full dataset used)" <<
RESTendl;
266 Int_t* bins =
new Int_t[
fNbins.size()];
267 Double_t* xmin =
new Double_t[
fNbins.size()];
268 Double_t* xmax =
new Double_t[
fNbins.size()];
270 for (
size_t n = 0; n <
fNbins.size(); n++) {
279 std::vector<std::string> varsAndWeight =
fVariables;
282 std::string weightsStr =
"";
283 for (
size_t n = 0; n <
fWeights.size(); n++) {
284 if (n > 0) weightsStr +=
"*";
288 df = df.Define(
"componentWeight", weightsStr);
289 varsAndWeight.push_back(
"componentWeight");
292 auto hn = df.HistoND({hName, hName, (int)
fNbins.size(), bins, xmin, xmax}, varsAndWeight);
293 THnD* hNd =
new THnD(*hn);
312 RESTError <<
"TRestComponentDataSet::RegenerateActiveNode. Active node undefined!" <<
RESTendl;
324 Double_t node = GetActiveNodeValue();
327 ROOT::RDF::RNode df = ROOT::RDataFrame(0);
334 Int_t* bins =
new Int_t[
fNbins.size()];
335 Double_t* xmin =
new Double_t[
fNbins.size()];
336 Double_t* xmax =
new Double_t[
fNbins.size()];
338 for (
size_t n = 0; n <
fNbins.size(); n++) {
347 std::vector<std::string> varsAndWeight =
fVariables;
350 std::string weightsStr =
"";
351 for (
size_t n = 0; n <
fWeights.size(); n++) {
352 if (n > 0) weightsStr +=
"*";
356 df = df.Define(
"componentWeight", weightsStr);
357 varsAndWeight.push_back(
"componentWeight");
360 auto hn = df.HistoND({hName, hName, (int)
fNbins.size(), bins, xmin, xmax}, varsAndWeight);
361 THnD* hNd =
new THnD(*hn);
377 RESTInfo <<
"Extracting parameterization nodes" <<
RESTendl;
379 std::vector<double> vs;
380 if (!IsDataSetLoaded()) {
381 RESTError <<
"TRestComponentDataSet::ExtractParameterizationNodes. Dataset has not been initialized!"
387 for (
const auto v : parValues) vs.push_back(v);
389 std::vector<double>::iterator ip;
390 ip = std::unique(vs.begin(), vs.begin() + vs.size());
391 vs.resize(std::distance(vs.begin(), ip));
392 std::sort(vs.begin(), vs.end());
393 ip = std::unique(vs.begin(), vs.end());
394 vs.resize(std::distance(vs.begin(), ip));
414 std::vector<Int_t> stats;
415 if (!IsDataSetLoaded()) {
416 RESTError <<
"TRestComponentDataSet::ExtractNodeStatistics. Dataset has not been initialized!"
421 RESTInfo <<
"Counting statistics for each node ..." <<
RESTendl;
431 RESTInfo <<
"Total entries for " <<
fParameter <<
":" << p <<
" = " << *nEv <<
RESTendl;
437 RESTWarning <<
"The number of requested samples (" <<
fSamples
438 <<
") is higher than the number of dataset entries (" << *nEv <<
")" <<
RESTendl;
440 RESTInfo <<
"Samples to be used for " <<
fParameter <<
":" << p <<
" = " << *nEv <<
RESTendl;
441 stats.push_back(*nEv);
457 RESTInfo <<
"Loading datasets" <<
RESTendl;
459 std::vector<std::string> fullFileNames;
467 if (fileName.empty()) {
468 RESTError <<
"TRestComponentDataSet::LoadDataSet. Error loading file : " << name <<
RESTendl;
469 RESTError <<
"Does the file exist?" <<
RESTendl;
470 RESTError <<
"You may use `<globals> <searchPath ...` to indicate the path location" <<
RESTendl;
473 fullFileNames.push_back(fileName);
480 RESTError <<
"Problem loading dataset from file list :" <<
RESTendl;
504 if (std::count(cNames.begin(), cNames.end(), var) == 0) {
505 RESTError <<
"Variable ---> " << var <<
" <--- NOT found on dataset" <<
RESTendl;
519 if (std::count(cNames.begin(), cNames.end(), var) == 0) {
520 RESTError <<
"Weight ---> " << var <<
" <--- NOT found on dataset" <<
RESTendl;
531 if (!IsDataSetLoaded()) {
532 RESTWarning <<
"TRestComponentDataSet::ValidDataSet. Dataset has not been loaded" <<
RESTendl;
533 RESTWarning <<
"Try calling TRestComponentDataSet::Initialize()" <<
RESTendl;
535 RESTInfo <<
"Trying to load datasets" <<
RESTendl;
537 if (IsDataSetLoaded()) {
540 RESTError <<
"Failed loading datasets" <<
RESTendl;
546 RESTError <<
"TRestComponentDataSet::ValidDataSet. Active node has not been defined" <<
RESTendl;
It defines a background/signal model distribution in a given parameter space (tipically x,...
Bool_t ValidDataSet()
Takes care of initializing datasets if have not been initialized. On sucess it returns true.
TRestDataSet fDataSet
The dataset used to initialize the distribution.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestAxionSolarFlux.
Bool_t fDataSetLoaded
It is true of the dataset was loaded without issues.
void FillHistograms() override
It will produce a histogram with the distribution defined using the variables and the weights for eac...
std::vector< Int_t > fNSimPerNode
std::vector< std::string > fWeights
A list with the dataset columns used to weight the distribution density and define rate.
Bool_t LoadDataSets()
A method responsible to import a list of TRestDataSet into fDataSet and check that the variables and ...
Bool_t VariablesOk()
It returns true if all variables have been found inside TRestDataSet.
void PrintStatistics()
It prints out the statistics available for each parametric node.
TRestComponentDataSet()
Default constructor.
~TRestComponentDataSet()
Default destructor.
void RegenerateActiveNodeDensity() override
It will regenerate the density histogram for the active node. It is practical in the case when the nu...
std::vector< Int_t > fTotalSamples
It defines the total number of entries for each parameterization node (Initialized by the dataset)
std::vector< Double_t > ExtractParameterizationNodes()
It returns a vector with all the different values found on the dataset column for the user given para...
void Initialize() override
It will initialize the data frame with the filelist and column names (or observables) that have been ...
Bool_t WeightsOk()
It returns true if all weights have been found inside TRestDataSet.
std::vector< std::string > fDataSetFileNames
The filename of the dataset used.
std::vector< Int_t > ExtractNodeStatistics()
It returns a vector with the number of entries found for each parameterization node.
void InitFromConfigFile() override
It customizes the retrieval of XML data values of this class.
It defines a background/signal model distribution in a given parameter space (tipically x,...
void InitFromConfigFile() override
It customizes the retrieval of XML data values of this class.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestAxionSolarFlux.
Int_t fActiveNode
It is used to define the node that will be accessed for rate retrieval.
Int_t fSamples
It introduces a fixed number of samples (if 0 it will take all available samples)
std::string fParameter
It is used to parameterize a set of distribution densities (e.g. WIMP or axion mass)
Float_t fPrecision
A precision used to select the node value with a given range defined as a fraction of the value.
std::vector< Int_t > fNbins
The number of bins in which we should divide each variable.
void Initialize() override
It initializes the random number. We avoid to define the section name here since we will never define...
std::vector< TVector2 > fRanges
The range of each of the variables used to create the PDF distribution.
std::vector< Double_t > fParameterizationNodes
It defines the nodes of the parameterization (Initialized by the dataset)
Bool_t HasNodes()
It returns true if any nodes have been defined.
TRandom3 * fRandom
Internal process random generator.
std::vector< std::string > fVariables
A list with the branches that will be used to create the distribution space.
std::vector< THnD * > fNodeDensity
The generated N-dimensional variable space density for a given node.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
ROOT::RDF::RNode GetDataFrame() const
Gives access to the RDataFrame.
TTree * GetTree() const
Gives access to the tree.
@ REST_Info
+show most of the information for each steps
std::string DoubleToString(Double_t d, std::string format="%8.6e")
Gets a string from a double.