REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
TRestDataSet.cxx
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
293#include "TRestDataSet.h"
294
295#include "TRestRun.h"
296#include "TRestTools.h"
297
298ClassImp(TRestDataSet);
299
304
319TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : TRestMetadata(cfgFileName) {
321}
322
327
332void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
333
339 if (fTree != nullptr) {
340 RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
341 << RESTendl;
342 return;
343 }
344
345 if (fFileSelection.empty()) FileSelection();
346
347 // We are not ready yet
348 if (fFileSelection.empty()) {
349 RESTError << "File selection is empty " << RESTendl;
350 return;
351 }
352
354 TRestRun run(fFileSelection.front());
355 std::vector<std::string> finalList;
356 finalList.push_back("runOrigin");
357 finalList.push_back("eventID");
358 finalList.push_back("timeStamp");
359
360 auto obsNames = run.GetAnalysisTree()->GetObservableNames();
361 for (const auto& obs : fObservablesList) {
362 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
363 finalList.push_back(obs);
364 } else {
365 RESTWarning << " Observable " << obs << " not found in observable list, skipping..." << RESTendl;
366 }
367 }
368
369 for (const auto& name : obsNames) {
370 for (const auto& pcs : fProcessObservablesList) {
371 if (name.find(pcs) == 0) finalList.push_back(name);
372 }
373 }
374
375 // Remove duplicated observables if any
376 std::sort(finalList.begin(), finalList.end());
377 finalList.erase(std::unique(finalList.begin(), finalList.end()), finalList.end());
378
379 if (fMT)
380 ROOT::EnableImplicitMT();
381 else
382 ROOT::DisableImplicitMT();
383
384 RESTInfo << "Initializing dataset" << RESTendl;
385 fDataFrame = ROOT::RDataFrame("AnalysisTree", fFileSelection);
386
387 RESTInfo << "Making cuts" << RESTendl;
389
390 // Adding new user columns added to the dataset
391 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
392 RESTInfo << "Adding column to dataset: " << cName << RESTendl;
393 finalList.emplace_back(cName);
394 fDataFrame = DefineColumn(cName, cExpression);
395 }
396
397 RegenerateTree(finalList);
398
399 RESTInfo << " - Dataset generated!" << RESTendl;
400}
401
405void TRestDataSet::RegenerateTree(std::vector<std::string> finalList) {
406 RESTInfo << "Generating snapshot." << RESTendl;
407 std::string user = getenv("USER");
408 std::string fOutName = "/tmp/rest_output_" + user + ".root";
409 if (!finalList.empty())
410 fDataFrame.Snapshot("AnalysisTree", fOutName, finalList);
411 else
412 fDataFrame.Snapshot("AnalysisTree", fOutName);
413
414 RESTInfo << "Re-importing analysis tree." << RESTendl;
415 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
416
417 TFile* f = TFile::Open(fOutName.c_str());
418 fTree = (TChain*)f->Get("AnalysisTree");
419}
420
424std::vector<std::string> TRestDataSet::FileSelection() {
425 fFileSelection.clear();
426
427 std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
428 std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
429
430 if (!time_stamp_end || !time_stamp_start) {
431 RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check "
432 "REST_StringHelper::StringToTimeStamp documentation for valid formats"
433 << RESTendl;
434 return fFileSelection;
435 }
436
437 std::vector<std::string> fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern);
438
439 RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl;
440 RESTInfo << "Total files : " << fileNames.size() << RESTendl;
441 RESTInfo << "This process may take long computation time in case there are many files." << RESTendl;
442
443 fTotalDuration = 0;
444 std::cout << "Processing file selection.";
445 int cnt = 1;
446 for (const auto& file : fileNames) {
447 if (cnt % 100 == 0) {
448 std::cout << std::endl;
449 std::cout << "Files processed: " << cnt << " ." << std::flush;
450 }
451 cnt++;
452 TRestRun run(file);
453 std::cout << "." << std::flush;
454 double runStart = run.GetStartTimestamp();
455 double runEnd = run.GetEndTimestamp();
456
457 if (runStart < time_stamp_start || runEnd > time_stamp_end) {
458 RESTInfo << "Rejecting file out of date range: " << file << RESTendl;
459 continue;
460 }
461
462 int n = 0;
463 bool accept = true;
464 for (const auto& md : fFilterMetadata) {
465 std::string mdValue = run.GetMetadataMember(md);
466
467 if (!fFilterContains[n].empty())
468 if (mdValue.find(fFilterContains[n]) == std::string::npos) accept = false;
469
470 if (fFilterGreaterThan[n] != -1) {
471 if (StringToDouble(mdValue) <= fFilterGreaterThan[n]) accept = false;
472 }
473
474 if (fFilterLowerThan[n] != -1)
475 if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;
476
477 if (fFilterEqualsTo[n] != -1)
478 if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;
479
480 n++;
481 }
482
483 if (!accept) continue;
484
485 Double_t acc = 0;
486 for (auto& [name, properties] : fQuantity) {
487 std::string value = run.ReplaceMetadataMembers(properties.metadata);
488 const Double_t val = REST_StringHelper::StringToDouble(value);
489
490 if (properties.strategy == "accumulate") {
491 acc += val;
492 properties.value = StringWithPrecision(val, 2);
493 }
494
495 if (properties.strategy == "max")
496 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) < val)
497 properties.value = value;
498
499 if (properties.strategy == "min")
500 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) > val)
501 properties.value = value;
502
503 if (properties.strategy == "unique") {
504 if (properties.value.empty())
505 properties.value = value;
506 else if (properties.value != value) {
507 RESTWarning << "TRestDataSet::FileSelection. Relevant quantity retrieval." << RESTendl;
508 RESTWarning << "A unique metadata member used for the `" << name
509 << "` quantity is not unique!!" << RESTendl;
510 RESTWarning << "Pre-registered value : " << properties.value << " New value : " << value
511 << RESTendl;
512 }
513 }
514
515 if (properties.strategy == "last") properties.value = value;
516 }
517
518 if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp();
519
520 if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp();
521
522 fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp();
523 fFileSelection.push_back(file);
524 }
525 std::cout << std::endl;
526
527 return fFileSelection;
528}
529
536ROOT::RDF::RNode TRestDataSet::Range(size_t from, size_t to) { return fDataFrame.Range(from, to); }
537
542ROOT::RDF::RNode TRestDataSet::ApplyRange(size_t from, size_t to) {
543 fDataFrame = fDataFrame.Range(from, to);
545 return fDataFrame;
546}
547
554ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
555 auto df = fDataFrame;
556
557 if (cut == nullptr) return df;
558
559 auto paramCut = cut->GetParamCut();
560 auto obsList = df.GetColumnNames();
561 for (const auto& [param, condition] : paramCut) {
562 if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) {
563 std::string pCut = param + condition;
564 RESTDebug << "Applying cut " << pCut << RESTendl;
565 df = df.Filter(pCut);
566 } else {
567 RESTWarning << " Cut observable " << param << " not found in observable list, skipping..."
568 << RESTendl;
569 }
570 }
571
572 auto cutString = cut->GetCutStrings();
573 for (const auto& pCut : cutString) {
574 bool added = false;
575 for (const auto& obs : obsList) {
576 if (pCut.find(obs) != std::string::npos) {
577 RESTDebug << "Applying cut " << pCut << RESTendl;
578 df = df.Filter(pCut);
579 added = true;
580 break;
581 }
582 }
583
584 if (!added) {
585 RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl;
586 }
587 }
588
589 return df;
590}
591
598 auto nEntries = fDataFrame.Count();
599 if (*nEntries == (long long unsigned int)GetTree()->GetEntries()) return *nEntries;
600 RESTWarning << "TRestDataSet::GetEntries. Number of tree entries is not the same as RDataFrame entries."
601 << RESTendl;
602 RESTWarning << "Returning RDataFrame entries" << RESTendl;
603 return *nEntries;
604}
605
618ROOT::RDF::RNode TRestDataSet::DefineColumn(const std::string& columnName, const std::string& formula) {
619 auto df = fDataFrame;
620
621 std::string evalFormula = formula;
622 for (auto const& [name, properties] : fQuantity)
623 evalFormula = REST_StringHelper::Replace(evalFormula, name, properties.value);
624
625 df = df.Define(columnName, evalFormula);
626
627 return df;
628}
629
635
636 RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl;
637 RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl;
638 RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl;
639 RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl;
640 RESTMetadata << " " << RESTendl;
641 RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl;
642 RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl;
643 RESTMetadata << " - Accumulated run time (days) : " << fTotalDuration / 3600. / 24. << RESTendl;
644
645 RESTMetadata << " " << RESTendl;
646
647 if (!fObservablesList.empty()) {
648 RESTMetadata << " Single observables added:" << RESTendl;
649 RESTMetadata << " -------------------------" << RESTendl;
650 for (const auto& l : fObservablesList) RESTMetadata << " - " << l << RESTendl;
651
652 RESTMetadata << " " << RESTendl;
653 }
654
655 if (!fProcessObservablesList.empty()) {
656 RESTMetadata << " Process observables added: " << RESTendl;
657 RESTMetadata << " -------------------------- " << RESTendl;
658 for (const auto& l : fProcessObservablesList) RESTMetadata << " - " << l << RESTendl;
659
660 RESTMetadata << " " << RESTendl;
661 }
662
663 if (!fFilterMetadata.empty()) {
664 RESTMetadata << " Metadata filters: " << RESTendl;
665 RESTMetadata << " ----------------- " << RESTendl;
666 RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl;
667 RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl;
668 int n = 0;
669 for (const auto& mdFilter : fFilterMetadata) {
670 RESTMetadata << " - " << mdFilter << ".";
671
672 if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
673 if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
674 if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
675 if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];
676
677 RESTMetadata << RESTendl;
678 n++;
679 }
680
681 RESTMetadata << " " << RESTendl;
682 }
683
684 if (!fQuantity.empty()) {
685 RESTMetadata << " Relevant quantities: " << RESTendl;
686 RESTMetadata << " -------------------- " << RESTendl;
687
688 for (auto const& [name, properties] : fQuantity) {
689 RESTMetadata << " - Name : " << name << ". Value : " << properties.value
690 << ". Strategy: " << properties.strategy << RESTendl;
691 RESTMetadata << " - Metadata: " << properties.metadata << RESTendl;
692 RESTMetadata << " - Description: " << properties.description << RESTendl;
693 RESTMetadata << " " << RESTendl;
694 }
695 }
696
697 if (!fColumnNameExpressions.empty()) {
698 RESTMetadata << " New columns added to generated dataframe: " << RESTendl;
699 RESTMetadata << " ---------------------------------------- " << RESTendl;
700 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
701 RESTMetadata << " - Name : " << cName << RESTendl;
702 RESTMetadata << " - Expression: " << cExpression << RESTendl;
703 RESTMetadata << " " << RESTendl;
704 }
705 }
706
707 if (fMergedDataset) {
708 RESTMetadata << " " << RESTendl;
709 RESTMetadata << "This is a combined dataset." << RESTendl;
710 RESTMetadata << " -------------------- " << RESTendl;
711 RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
712 RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
713 << RESTendl;
714 RESTMetadata << " " << RESTendl;
715 RESTMetadata << "List of imported files: " << RESTendl;
716 RESTMetadata << " -------------------- " << RESTendl;
717 for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
718 }
719
720 RESTMetadata << " " << RESTendl;
721 if (fMT)
722 RESTMetadata << " - Multithreading was enabled" << RESTendl;
723 else
724 RESTMetadata << " - Multithreading was NOT enabled" << RESTendl;
725
726 RESTMetadata << "----" << RESTendl;
727}
728
734
736 TiXmlElement* filterDefinition = GetElement("filter");
737 while (filterDefinition != nullptr) {
738 std::string metadata = GetFieldValue("metadata", filterDefinition);
739 if (metadata.empty() || metadata == "Not defined") {
740 RESTError << "Filter key defined without metadata member!" << RESTendl;
741 exit(1);
742 }
743
744 fFilterMetadata.push_back(metadata);
745
746 std::string contains = GetFieldValue("contains", filterDefinition);
747 if (contains == "Not defined") contains = "";
748 Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
749 Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
750 Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));
751
752 fFilterContains.push_back(contains);
753 fFilterGreaterThan.push_back(greaterThan);
754 fFilterLowerThan.push_back(lowerThan);
755 fFilterEqualsTo.push_back(equalsTo);
756
757 filterDefinition = GetNextElement(filterDefinition);
758 }
759
761 TiXmlElement* observablesDefinition = GetElement("observables");
762 while (observablesDefinition != nullptr) {
763 std::string observables = GetFieldValue("list", observablesDefinition);
764 if (observables.empty() || observables == "Not defined") {
765 RESTError << "<observables key does not contain a list!" << RESTendl;
766 exit(1);
767 }
768
769 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
770
771 fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end());
772
773 observablesDefinition = GetNextElement(observablesDefinition);
774 }
775
777 TiXmlElement* obsProcessDefinition = GetElement("processObservables");
778 while (obsProcessDefinition != nullptr) {
779 std::string observables = GetFieldValue("list", obsProcessDefinition);
780 if (observables.empty() || observables == "Not defined") {
781 RESTError << "<processObservables key does not contain a list!" << RESTendl;
782 exit(1);
783 }
784
785 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
786
787 for (const auto& l : obsList) fProcessObservablesList.push_back(l);
788
789 obsProcessDefinition = GetNextElement(obsProcessDefinition);
790 }
791
793 TiXmlElement* quantityDefinition = GetElement("quantity");
794 while (quantityDefinition != nullptr) {
795 std::string name = GetFieldValue("name", quantityDefinition);
796 if (name.empty() || name == "Not defined") {
797 RESTError << "<quantity key does not contain a name!" << RESTendl;
798 exit(1);
799 }
800
801 std::string metadata = GetFieldValue("metadata", quantityDefinition);
802 if (metadata.empty() || metadata == "Not defined") {
803 RESTError << "<quantity key does not contain a metadata value!" << RESTendl;
804 exit(1);
805 }
806
807 std::string strategy = GetFieldValue("strategy", quantityDefinition);
808 if (strategy.empty() || strategy == "Not defined") {
809 strategy = "unique";
810 }
811
812 std::string description = GetFieldValue("description", quantityDefinition);
813
814 RelevantQuantity quantity;
815 quantity.metadata = metadata;
816 quantity.strategy = strategy;
817 quantity.description = description;
818 quantity.value = "";
819
820 fQuantity[name] = quantity;
821
822 quantityDefinition = GetNextElement(quantityDefinition);
823 }
824
826 TiXmlElement* columnDefinition = GetElement("addColumn");
827 while (columnDefinition != nullptr) {
828 std::string name = GetFieldValue("name", columnDefinition);
829 if (name.empty() || name == "Not defined") {
830 RESTError << "<define key does not contain a name name!" << RESTendl;
831 exit(1);
832 }
833
834 std::string expression = GetFieldValue("expression", columnDefinition);
835 if (expression.empty() || expression == "Not defined") {
836 RESTError << "<addColumn key does not contain a expression value!" << RESTendl;
837 exit(1);
838 }
839
840 fColumnNameExpressions.push_back({name, expression});
841
842 columnDefinition = GetNextElement(columnDefinition);
843 }
844
845 fCut = (TRestCut*)InstantiateChildMetadata("TRestCut");
846}
847
861void TRestDataSet::Export(const std::string& filename, std::vector<std::string> excludeColumns) {
862 RESTInfo << "Exporting dataset" << RESTendl;
863
864 std::vector<std::string> columns = fDataFrame.GetColumnNames();
865 if (!excludeColumns.empty()) {
866 columns.erase(std::remove_if(columns.begin(), columns.end(),
867 [&excludeColumns](std::string elem) {
868 return std::find(excludeColumns.begin(), excludeColumns.end(),
869 elem) != excludeColumns.end();
870 }),
871 columns.end());
872
873 RESTInfo << "Re-Generating snapshot." << RESTendl;
874 std::string user = getenv("USER");
875 std::string fOutName = "/tmp/rest_output_" + user + ".root";
876 fDataFrame.Snapshot("AnalysisTree", fOutName, columns);
877
878 RESTInfo << "Re-importing analysis tree." << RESTendl;
879 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
880
881 TFile* f = TFile::Open(fOutName.c_str());
882 fTree = (TChain*)f->Get("AnalysisTree");
883 }
884
885 if (TRestTools::GetFileNameExtension(filename) == "txt" ||
886 TRestTools::GetFileNameExtension(filename) == "csv") {
887 if (excludeColumns.empty()) {
888 RESTInfo << "Re-Generating snapshot." << RESTendl;
889 std::string user = getenv("USER");
890 std::string fOutName = "/tmp/rest_output_" + user + ".root";
891 fDataFrame.Snapshot("AnalysisTree", fOutName);
892
893 TFile* f = TFile::Open(fOutName.c_str());
894 fTree = (TChain*)f->Get("AnalysisTree");
895 }
896
897 std::vector<std::string> dataTypes;
898 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
899 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
900 std::string type = fTree->GetLeaf((TString)bName)->GetTypeName();
901 dataTypes.push_back(type);
902 if (type != "Double_t" && type != "Int_t") {
903 RESTError << "Branch name : " << bName << " is type : " << type << RESTendl;
904 RESTError << "Only Int_t and Double_t types are allowed for "
905 "exporting to ASCII table"
906 << RESTendl;
907 RESTError << "File will not be generated" << RESTendl;
908 return;
909 }
910 }
911
912 FILE* f = fopen(filename.c_str(), "wt");
914 fprintf(f, "### TRestDataSet generated file\n");
915 fprintf(f, "### \n");
916 fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str());
917 fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str());
918 fprintf(f, "###\n");
919 fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration);
920 fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.);
921 fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.);
922 fprintf(f, "###\n");
923 fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str());
924 fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str());
925 fprintf(f, "###\n");
926 if (!fFilterMetadata.empty()) {
927 fprintf(f, "### Metadata filters : \n");
928 int n = 0;
929 for (const auto& md : fFilterMetadata) {
930 fprintf(f, "### - %s.", md.c_str());
931 if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
932 if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
933 if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
934 if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
935 fprintf(f, "\n");
936 n++;
937 }
938 }
939 fprintf(f, "###\n");
940 fprintf(f, "### Relevant quantities: \n");
941 for (auto& [name, properties] : fQuantity) {
942 fprintf(f, "### - %s : %s - %s\n", name.c_str(), properties.value.c_str(),
943 properties.description.c_str());
944 }
945 fprintf(f, "###\n");
946 fprintf(f, "### Observables list: ");
947 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
948 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
949 fprintf(f, " %s", bName.c_str());
950 }
951 fprintf(f, "\n");
952 fprintf(f, "###\n");
953 fprintf(f, "### Data starts here\n");
954
955 auto obsNames = fDataFrame.GetColumnNames();
956 std::string obsListStr = "";
957 for (const auto& l : obsNames) {
958 if (!obsListStr.empty()) obsListStr += ":";
959 obsListStr += l;
960 }
961
962 // We do this so that later we can recover the values using TTree::GetVal
963 fTree->Draw((TString)obsListStr, "", "goff");
964
965 for (unsigned int n = 0; n < fTree->GetEntries(); n++) {
966 for (unsigned int m = 0; m < GetNumberOfBranches(); m++) {
967 std::string bName = fTree->GetListOfBranches()->At(m)->GetName();
968 if (m > 0) fprintf(f, "\t");
969 if (dataTypes[m] == "Double_t")
970 if (bName == "timeStamp")
971 fprintf(f, "%010.0lf", fTree->GetVal(m)[n]);
972 else
973 fprintf(f, "%05.3e", fTree->GetVal(m)[n]);
974 else
975 fprintf(f, "%06d", (Int_t)fTree->GetVal(m)[n]);
976 }
977 fprintf(f, "\n");
978 }
979 fclose(f);
980
981 return;
982 } else if (TRestTools::GetFileNameExtension(filename) == "root") {
983 fDataFrame.Snapshot("AnalysisTree", filename);
984
985 TFile* f = TFile::Open(filename.c_str(), "UPDATE");
986 std::string name = this->GetName();
987 if (name.empty()) name = "mock";
988 this->Write(name.c_str());
989 f->Close();
990 } else {
991 RESTWarning << "TRestDataSet::Export. Extension " << TRestTools::GetFileNameExtension(filename)
992 << " not recognized" << RESTendl;
993 }
994 RESTInfo << "Dataset generated: " << filename << RESTendl;
995}
996
1001 SetName(dS.GetName());
1002 fFilterStartTime = dS.GetFilterStartTime();
1003 fFilterEndTime = dS.GetFilterEndTime();
1004 fStartTime = dS.GetStartTime();
1005 fEndTime = dS.GetEndTime();
1006 fFilePattern = dS.GetFilePattern();
1007 fObservablesList = dS.GetObservablesList();
1009 fProcessObservablesList = dS.GetProcessObservablesList();
1010 fFilterMetadata = dS.GetFilterMetadata();
1011 fFilterContains = dS.GetFilterContains();
1012 fFilterGreaterThan = dS.GetFilterGreaterThan();
1013 fFilterLowerThan = dS.GetFilterLowerThan();
1014 fFilterEqualsTo = dS.GetFilterEqualsTo();
1015 fQuantity = dS.GetQuantity();
1016 fColumnNameExpressions = dS.GetAddedColumns();
1018 fCut = dS.GetCut();
1019
1020 return *this;
1021}
1022
1028 auto obsNames = GetObservablesList();
1029 for (const auto& obs : fObservablesList) {
1030 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
1031 RESTError << "Cannot merge dataSets with different observable list " << RESTendl;
1032 return false;
1033 }
1034 }
1035
1036 if (fStartTime > dS.GetStartTime()) fStartTime = dS.GetStartTime();
1037 if (fEndTime < dS.GetEndTime()) fEndTime = dS.GetEndTime();
1038
1039 auto fileSelection = dS.GetFileSelection();
1040 fFileSelection.insert(fFileSelection.end(), fileSelection.begin(), fileSelection.end());
1041
1043
1044 return true;
1045}
1046
1052void TRestDataSet::Import(const std::string& fileName) {
1053 if (TRestTools::GetFileNameExtension(fileName) != "root") {
1054 RESTError << "Datasets can only be imported from root files" << RESTendl;
1055 return;
1056 }
1057
1058 TRestDataSet* dS = nullptr;
1059 TFile* file = TFile::Open(fileName.c_str(), "READ");
1060 if (file != nullptr) {
1061 TIter nextkey(file->GetListOfKeys());
1062 TKey* key;
1063 while ((key = (TKey*)nextkey())) {
1064 std::string kName = key->GetClassName();
1065 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1066 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1067 dS = file->Get<TRestDataSet>(key->GetName());
1068 *this = *dS;
1069 }
1070 }
1071 }
1072
1073 if (dS == nullptr) {
1074 RESTError << fileName << " is not a valid dataSet" << RESTendl;
1075 return;
1076 }
1077
1078 if (fMT)
1079 ROOT::EnableImplicitMT();
1080 else
1081 ROOT::DisableImplicitMT();
1082
1083 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileName);
1084
1085 fTree = (TChain*)file->Get("AnalysisTree");
1086}
1087
1097void TRestDataSet::Import(std::vector<std::string> fileNames) {
1098 for (const auto& fN : fileNames)
1099 if (TRestTools::GetFileNameExtension(fN) != "root") {
1100 RESTError << "Datasets can only be imported from root files" << RESTendl;
1101 return;
1102 }
1103
1104 int count = 0;
1105 auto it = fileNames.begin();
1106 while (it != fileNames.end()) {
1107 std::string fileName = *it;
1108 TFile* file = TFile::Open(fileName.c_str(), "READ");
1109 bool isValid = false;
1110 if (file != nullptr) {
1111 TIter nextkey(file->GetListOfKeys());
1112 TKey* key;
1113 while ((key = (TKey*)nextkey())) {
1114 std::string kName = key->GetClassName();
1115 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1116 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1117 TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
1119 dS->PrintMetadata();
1120
1121 if (count == 0) {
1122 *this = *dS;
1123 isValid = true;
1124 } else {
1125 isValid = Merge(*dS);
1126 }
1127
1128 if (isValid) count++;
1129 }
1130 }
1131 } else {
1132 RESTError << "Cannot open " << fileName << RESTendl;
1133 }
1134
1135 if (!isValid) {
1136 RESTError << fileName << " is not a valid dataSet skipping..." << RESTendl;
1137 it = fileNames.erase(it);
1138 } else {
1139 ++it;
1140 }
1141 }
1142
1143 if (fileNames.empty()) {
1144 RESTError << "File selection is empty, dataSet will not be imported " << RESTendl;
1145 return;
1146 }
1147
1148 RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
1149 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileNames);
1150
1151 if (fTree != nullptr) {
1152 delete fTree;
1153 fTree = nullptr;
1154 }
1155 fTree = new TChain("AnalysisTree");
1156
1157 for (const auto& fN : fileNames) fTree->Add((TString)fN);
1158
1159 fMergedDataset = true;
1160 fImportedFiles = fileNames;
1161
1162 fQuantity.clear();
1163}
std::vector< std::string > GetObservableNames()
It returns a vector with strings containing all the observables that exist in the analysis tree.
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition: TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
Definition: TRestDataSet.h:34
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
Definition: TRestDataSet.h:70
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
Definition: TRestDataSet.h:76
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
Definition: TRestDataSet.h:118
std::vector< std::string > fProcessObservablesList
It contains a list of the process where all observables should be added.
Definition: TRestDataSet.h:64
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
Definition: TRestDataSet.h:82
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
Definition: TRestDataSet.h:106
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
Definition: TRestDataSet.h:97
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
Definition: TRestDataSet.h:115
size_t GetNumberOfBranches()
Number of variables (or observables)
Definition: TRestDataSet.h:160
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
Definition: TRestDataSet.h:166
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
Definition: TRestDataSet.h:109
TRestCut * fCut
Parameter cuts over the selected dataset.
Definition: TRestDataSet.h:85
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Definition: TRestDataSet.h:52
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
Definition: TRestDataSet.h:67
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
Definition: TRestDataSet.h:91
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
Definition: TRestDataSet.h:163
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Definition: TRestDataSet.h:55
Double_t fStartTime
TimeStamp for the start time of the first file.
Definition: TRestDataSet.h:94
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
Definition: TRestDataSet.h:61
TTree * GetTree() const
Gives access to the tree.
Definition: TRestDataSet.h:138
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
Definition: TRestDataSet.h:100
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Definition: TRestDataSet.h:103
Double_t fTotalDuration
The total integrated run time of selected files.
Definition: TRestDataSet.h:88
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
Definition: TRestDataSet.h:58
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
Definition: TRestDataSet.h:73
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
Definition: TRestDataSet.h:79
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
Definition: TRestMetadata.h:74
virtual void PrintMetadata()
Implemented it in the derived metadata class to print out specific metadata information.
endl_t RESTendl
Termination flag object for TRestStringOutput.
TiXmlElement * GetElement(std::string eleDeclare, TiXmlElement *e=nullptr)
Get an xml element from a given parent element, according to its declaration.
Int_t LoadConfigFromFile(const std::string &configFilename, const std::string &sectionName="")
Give the file name, find out the corresponding section. Then call the main starter.
TRestMetadata * InstantiateChildMetadata(int index, std::string pattern="")
This method will retrieve a new TRestMetadata instance of a child element of the present TRestMetadat...
virtual void InitFromConfigFile()
To make settings from rml file. This method must be implemented in the derived class.
TRestStringOutput::REST_Verbose_Level GetVerboseLevel()
returns the verboselevel in type of REST_Verbose_Level enumerator
std::string GetFieldValue(std::string parName, TiXmlElement *e)
Returns the field value of an xml element which has the specified name.
void SetSectionName(std::string sName)
set the section name, clear the section content
std::string fConfigFileName
Full name of the rml file.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
overwriting the write() method with fStore considered
TiXmlElement * GetNextElement(TiXmlElement *e)
Get the next sibling xml element of this element, with same eleDeclare.
Data provider and manager in REST.
Definition: TRestRun.h:18
std::string ReplaceMetadataMembers(const std::string &instr, Int_t precision=8)
It will replace the data members contained inside the string given as input. The data members in the ...
Definition: TRestRun.cxx:1652
@ REST_Info
+show most of the information for each steps
static std::pair< std::string, std::string > SeparatePathAndName(const std::string &fullname)
Separate path and filename in a full path+filename string, returns a pair of string.
Definition: TRestTools.cxx:813
static std::string GetFileNameExtension(const std::string &fullname)
Gets the file extension as the substring found after the latest ".".
Definition: TRestTools.cxx:823
static std::vector< std::string > GetFilesMatchingPattern(std::string pattern, bool unlimited=false)
Returns a list of files whose name match the pattern string. Key word is "*". e.g....
Definition: TRestTools.cxx:976
TClass * GetClassQuick(std::string type)
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::vector< std::string > Split(std::string in, std::string separator, bool allowBlankString=false, bool removeWhiteSpaces=false, int startPos=-1)
Split the input string according to the given separator. Returning a vector of fragments.
Double_t StringToDouble(std::string in)
Gets a double from a string.
std::string ToDateTimeString(time_t time)
Format time_t into string.
std::string Replace(std::string in, std::string thisString, std::string byThisString, size_t fromPosition=0, Int_t N=0)
Replace any occurences of thisSring by byThisString inside string in.
std::string metadata
The associated metadata member used to register the relevant quantity.
Definition: TRestDataSet.h:38
std::string description
A user given description that can be used to define the relevant quantity.
Definition: TRestDataSet.h:44
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
Definition: TRestDataSet.h:41
std::string value
The quantity value.
Definition: TRestDataSet.h:47