REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
TRestDataSet.cxx
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
293#include "TRestDataSet.h"
294
295#include "TRestRun.h"
296#include "TRestTools.h"
297
298ClassImp(TRestDataSet);
299
304
319TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : TRestMetadata(cfgFileName) {
321}
322
327
332void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
333
339 if (fTree != nullptr) {
340 RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
341 << RESTendl;
342 return;
343 }
344
345 if (fFileSelection.empty()) FileSelection();
346
347 // We are not ready yet
348 if (fFileSelection.empty()) {
349 RESTError << "File selection is empty " << RESTendl;
350 return;
351 }
352
354 TRestRun run(fFileSelection.front());
355 std::vector<std::string> finalList;
356 finalList.push_back("runOrigin");
357 finalList.push_back("eventID");
358 finalList.push_back("timeStamp");
359
360 auto obsNames = run.GetAnalysisTree()->GetObservableNames();
361 for (const auto& obs : fObservablesList) {
362 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
363 finalList.push_back(obs);
364 } else {
365 RESTWarning << " Observable " << obs << " not found in observable list, skipping..." << RESTendl;
366 }
367 }
368
369 for (const auto& name : obsNames) {
370 for (const auto& pcs : fProcessObservablesList) {
371 if (name.find(pcs) == 0) finalList.push_back(name);
372 }
373 }
374
375 // Remove duplicated observables if any
376 std::sort(finalList.begin(), finalList.end());
377 finalList.erase(std::unique(finalList.begin(), finalList.end()), finalList.end());
378
379 if (fMT)
380 ROOT::EnableImplicitMT();
381 else
382 ROOT::DisableImplicitMT();
383
384 RESTInfo << "Initializing dataset" << RESTendl;
385 fDataSet = ROOT::RDataFrame("AnalysisTree", fFileSelection);
386
387 RESTInfo << "Making cuts" << RESTendl;
389
390 // Adding new user columns added to the dataset
391 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
392 RESTInfo << "Adding column to dataset: " << cName << RESTendl;
393 finalList.emplace_back(cName);
394 fDataSet = DefineColumn(cName, cExpression);
395 }
396
397 RESTInfo << "Generating snapshot." << RESTendl;
398 std::string user = getenv("USER");
399 std::string fOutName = "/tmp/rest_output_" + user + ".root";
400 fDataSet.Snapshot("AnalysisTree", fOutName, finalList);
401
402 RESTInfo << "Re-importing analysis tree." << RESTendl;
403 fDataSet = ROOT::RDataFrame("AnalysisTree", fOutName);
404
405 TFile* f = TFile::Open(fOutName.c_str());
406 fTree = (TChain*)f->Get("AnalysisTree");
407
408 RESTInfo << " - Dataset generated!" << RESTendl;
409}
410
414std::vector<std::string> TRestDataSet::FileSelection() {
415 fFileSelection.clear();
416
417 std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
418 std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
419
420 if (!time_stamp_end || !time_stamp_start) {
421 RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check "
422 "REST_StringHelper::StringToTimeStamp documentation for valid formats"
423 << RESTendl;
424 return fFileSelection;
425 }
426
427 std::vector<std::string> fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern);
428
429 RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl;
430 RESTInfo << "Total files : " << fileNames.size() << RESTendl;
431 RESTInfo << "This process may take long computation time in case there are many files." << RESTendl;
432
433 fTotalDuration = 0;
434 std::cout << "Processing file selection.";
435 int cnt = 1;
436 for (const auto& file : fileNames) {
437 if (cnt % 100 == 0) {
438 std::cout << std::endl;
439 std::cout << "Files processed: " << cnt << " ." << std::flush;
440 }
441 cnt++;
442 TRestRun run(file);
443 std::cout << "." << std::flush;
444 double runStart = run.GetStartTimestamp();
445 double runEnd = run.GetEndTimestamp();
446
447 if (runStart < time_stamp_start || runEnd > time_stamp_end) {
448 RESTInfo << "Rejecting file out of date range: " << file << RESTendl;
449 continue;
450 }
451
452 int n = 0;
453 bool accept = true;
454 for (const auto& md : fFilterMetadata) {
455 std::string mdValue = run.GetMetadataMember(md);
456
457 if (!fFilterContains[n].empty())
458 if (mdValue.find(fFilterContains[n]) == std::string::npos) accept = false;
459
460 if (fFilterGreaterThan[n] != -1) {
461 if (StringToDouble(mdValue) <= fFilterGreaterThan[n]) accept = false;
462 }
463
464 if (fFilterLowerThan[n] != -1)
465 if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;
466
467 if (fFilterEqualsTo[n] != -1)
468 if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;
469
470 n++;
471 }
472
473 if (!accept) continue;
474
475 Double_t acc = 0;
476 for (auto& [name, properties] : fQuantity) {
477 std::string value = run.ReplaceMetadataMembers(properties.metadata);
478 const Double_t val = REST_StringHelper::StringToDouble(value);
479
480 if (properties.strategy == "accumulate") {
481 acc += val;
482 properties.value = StringWithPrecision(val, 2);
483 }
484
485 if (properties.strategy == "max")
486 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) < val)
487 properties.value = value;
488
489 if (properties.strategy == "min")
490 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) > val)
491 properties.value = value;
492
493 if (properties.strategy == "unique") {
494 if (properties.value.empty())
495 properties.value = value;
496 else if (properties.value != value) {
497 RESTWarning << "TRestDataSet::FileSelection. Relevant quantity retrieval." << RESTendl;
498 RESTWarning << "A unique metadata member used for the `" << name
499 << "` quantity is not unique!!" << RESTendl;
500 RESTWarning << "Pre-registered value : " << properties.value << " New value : " << value
501 << RESTendl;
502 }
503 }
504
505 if (properties.strategy == "last") properties.value = value;
506 }
507
508 if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp();
509
510 if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp();
511
512 fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp();
513 fFileSelection.push_back(file);
514 }
515 std::cout << std::endl;
516
517 return fFileSelection;
518}
519
526ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
527 auto df = fDataSet;
528
529 if (cut == nullptr) return df;
530
531 auto paramCut = cut->GetParamCut();
532 auto obsList = df.GetColumnNames();
533 for (const auto& [param, condition] : paramCut) {
534 if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) {
535 std::string pCut = param + condition;
536 RESTDebug << "Applying cut " << pCut << RESTendl;
537 df = df.Filter(pCut);
538 } else {
539 RESTWarning << " Cut observable " << param << " not found in observable list, skipping..."
540 << RESTendl;
541 }
542 }
543
544 auto cutString = cut->GetCutStrings();
545 for (const auto& pCut : cutString) {
546 bool added = false;
547 for (const auto& obs : obsList) {
548 if (pCut.find(obs) != std::string::npos) {
549 RESTDebug << "Applying cut " << pCut << RESTendl;
550 df = df.Filter(pCut);
551 added = true;
552 break;
553 }
554 }
555
556 if (!added) {
557 RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl;
558 }
559 }
560
561 return df;
562}
563
576ROOT::RDF::RNode TRestDataSet::DefineColumn(const std::string& columnName, const std::string& formula) {
577 auto df = fDataSet;
578
579 std::string evalFormula = formula;
580 for (auto const& [name, properties] : fQuantity)
581 evalFormula = REST_StringHelper::Replace(evalFormula, name, properties.value);
582
583 df = df.Define(columnName, evalFormula);
584
585 return df;
586}
587
593
594 RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl;
595 RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl;
596 RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl;
597 RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl;
598 RESTMetadata << " " << RESTendl;
599 RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl;
600 RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl;
601 RESTMetadata << " - Accumulated run time (days) : " << fTotalDuration / 3600. / 24. << RESTendl;
602
603 RESTMetadata << " " << RESTendl;
604
605 if (!fObservablesList.empty()) {
606 RESTMetadata << " Single observables added:" << RESTendl;
607 RESTMetadata << " -------------------------" << RESTendl;
608 for (const auto& l : fObservablesList) RESTMetadata << " - " << l << RESTendl;
609
610 RESTMetadata << " " << RESTendl;
611 }
612
613 if (!fProcessObservablesList.empty()) {
614 RESTMetadata << " Process observables added: " << RESTendl;
615 RESTMetadata << " -------------------------- " << RESTendl;
616 for (const auto& l : fProcessObservablesList) RESTMetadata << " - " << l << RESTendl;
617
618 RESTMetadata << " " << RESTendl;
619 }
620
621 if (!fFilterMetadata.empty()) {
622 RESTMetadata << " Metadata filters: " << RESTendl;
623 RESTMetadata << " ----------------- " << RESTendl;
624 RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl;
625 RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl;
626 int n = 0;
627 for (const auto& mdFilter : fFilterMetadata) {
628 RESTMetadata << " - " << mdFilter << ".";
629
630 if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
631 if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
632 if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
633 if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];
634
635 RESTMetadata << RESTendl;
636 n++;
637 }
638
639 RESTMetadata << " " << RESTendl;
640 }
641
642 if (!fQuantity.empty()) {
643 RESTMetadata << " Relevant quantities: " << RESTendl;
644 RESTMetadata << " -------------------- " << RESTendl;
645
646 for (auto const& [name, properties] : fQuantity) {
647 RESTMetadata << " - Name : " << name << ". Value : " << properties.value
648 << ". Strategy: " << properties.strategy << RESTendl;
649 RESTMetadata << " - Metadata: " << properties.metadata << RESTendl;
650 RESTMetadata << " - Description: " << properties.description << RESTendl;
651 RESTMetadata << " " << RESTendl;
652 }
653 }
654
655 if (!fColumnNameExpressions.empty()) {
656 RESTMetadata << " New columns added to generated dataframe: " << RESTendl;
657 RESTMetadata << " ---------------------------------------- " << RESTendl;
658 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
659 RESTMetadata << " - Name : " << cName << RESTendl;
660 RESTMetadata << " - Expression: " << cExpression << RESTendl;
661 RESTMetadata << " " << RESTendl;
662 }
663 }
664
665 if (fMergedDataset) {
666 RESTMetadata << " " << RESTendl;
667 RESTMetadata << "This is a combined dataset." << RESTendl;
668 RESTMetadata << " -------------------- " << RESTendl;
669 RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
670 RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
671 << RESTendl;
672 RESTMetadata << " " << RESTendl;
673 RESTMetadata << "List of imported files: " << RESTendl;
674 RESTMetadata << " -------------------- " << RESTendl;
675 for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
676 }
677
678 RESTMetadata << " " << RESTendl;
679 if (fMT)
680 RESTMetadata << " - Multithreading was enabled" << RESTendl;
681 else
682 RESTMetadata << " - Multithreading was NOT enabled" << RESTendl;
683
684 RESTMetadata << "----" << RESTendl;
685}
686
692
694 TiXmlElement* filterDefinition = GetElement("filter");
695 while (filterDefinition != nullptr) {
696 std::string metadata = GetFieldValue("metadata", filterDefinition);
697 if (metadata.empty() || metadata == "Not defined") {
698 RESTError << "Filter key defined without metadata member!" << RESTendl;
699 exit(1);
700 }
701
702 fFilterMetadata.push_back(metadata);
703
704 std::string contains = GetFieldValue("contains", filterDefinition);
705 if (contains == "Not defined") contains = "";
706 Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
707 Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
708 Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));
709
710 fFilterContains.push_back(contains);
711 fFilterGreaterThan.push_back(greaterThan);
712 fFilterLowerThan.push_back(lowerThan);
713 fFilterEqualsTo.push_back(equalsTo);
714
715 filterDefinition = GetNextElement(filterDefinition);
716 }
717
719 TiXmlElement* observablesDefinition = GetElement("observables");
720 while (observablesDefinition != nullptr) {
721 std::string observables = GetFieldValue("list", observablesDefinition);
722 if (observables.empty() || observables == "Not defined") {
723 RESTError << "<observables key does not contain a list!" << RESTendl;
724 exit(1);
725 }
726
727 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
728
729 fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end());
730
731 observablesDefinition = GetNextElement(observablesDefinition);
732 }
733
735 TiXmlElement* obsProcessDefinition = GetElement("processObservables");
736 while (obsProcessDefinition != nullptr) {
737 std::string observables = GetFieldValue("list", obsProcessDefinition);
738 if (observables.empty() || observables == "Not defined") {
739 RESTError << "<processObservables key does not contain a list!" << RESTendl;
740 exit(1);
741 }
742
743 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
744
745 for (const auto& l : obsList) fProcessObservablesList.push_back(l);
746
747 obsProcessDefinition = GetNextElement(obsProcessDefinition);
748 }
749
751 TiXmlElement* quantityDefinition = GetElement("quantity");
752 while (quantityDefinition != nullptr) {
753 std::string name = GetFieldValue("name", quantityDefinition);
754 if (name.empty() || name == "Not defined") {
755 RESTError << "<quantity key does not contain a name!" << RESTendl;
756 exit(1);
757 }
758
759 std::string metadata = GetFieldValue("metadata", quantityDefinition);
760 if (metadata.empty() || metadata == "Not defined") {
761 RESTError << "<quantity key does not contain a metadata value!" << RESTendl;
762 exit(1);
763 }
764
765 std::string strategy = GetFieldValue("strategy", quantityDefinition);
766 if (strategy.empty() || strategy == "Not defined") {
767 strategy = "unique";
768 }
769
770 std::string description = GetFieldValue("description", quantityDefinition);
771
772 RelevantQuantity quantity;
773 quantity.metadata = metadata;
774 quantity.strategy = strategy;
775 quantity.description = description;
776 quantity.value = "";
777
778 fQuantity[name] = quantity;
779
780 quantityDefinition = GetNextElement(quantityDefinition);
781 }
782
784 TiXmlElement* columnDefinition = GetElement("addColumn");
785 while (columnDefinition != nullptr) {
786 std::string name = GetFieldValue("name", columnDefinition);
787 if (name.empty() || name == "Not defined") {
788 RESTError << "<define key does not contain a name name!" << RESTendl;
789 exit(1);
790 }
791
792 std::string expression = GetFieldValue("expression", columnDefinition);
793 if (expression.empty() || expression == "Not defined") {
794 RESTError << "<addColumn key does not contain a expression value!" << RESTendl;
795 exit(1);
796 }
797
798 fColumnNameExpressions.push_back({name, expression});
799
800 columnDefinition = GetNextElement(columnDefinition);
801 }
802
803 fCut = (TRestCut*)InstantiateChildMetadata("TRestCut");
804}
805
819void TRestDataSet::Export(const std::string& filename, std::vector<std::string> excludeColumns) {
820 RESTInfo << "Exporting dataset" << RESTendl;
821
822 std::vector<std::string> columns = fDataSet.GetColumnNames();
823 if (!excludeColumns.empty()) {
824 columns.erase(std::remove_if(columns.begin(), columns.end(),
825 [&excludeColumns](std::string elem) {
826 return std::find(excludeColumns.begin(), excludeColumns.end(),
827 elem) != excludeColumns.end();
828 }),
829 columns.end());
830
831 RESTInfo << "Re-Generating snapshot." << RESTendl;
832 std::string user = getenv("USER");
833 std::string fOutName = "/tmp/rest_output_" + user + ".root";
834 fDataSet.Snapshot("AnalysisTree", fOutName, columns);
835
836 RESTInfo << "Re-importing analysis tree." << RESTendl;
837 fDataSet = ROOT::RDataFrame("AnalysisTree", fOutName);
838
839 TFile* f = TFile::Open(fOutName.c_str());
840 fTree = (TChain*)f->Get("AnalysisTree");
841 }
842
843 if (TRestTools::GetFileNameExtension(filename) == "txt" ||
844 TRestTools::GetFileNameExtension(filename) == "csv") {
845 if (excludeColumns.empty()) {
846 RESTInfo << "Re-Generating snapshot." << RESTendl;
847 std::string user = getenv("USER");
848 std::string fOutName = "/tmp/rest_output_" + user + ".root";
849 fDataSet.Snapshot("AnalysisTree", fOutName);
850
851 TFile* f = TFile::Open(fOutName.c_str());
852 fTree = (TChain*)f->Get("AnalysisTree");
853 }
854
855 std::vector<std::string> dataTypes;
856 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
857 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
858 std::string type = fTree->GetLeaf((TString)bName)->GetTypeName();
859 dataTypes.push_back(type);
860 if (type != "Double_t" && type != "Int_t") {
861 RESTError << "Branch name : " << bName << " is type : " << type << RESTendl;
862 RESTError << "Only Int_t and Double_t types are allowed for "
863 "exporting to ASCII table"
864 << RESTendl;
865 RESTError << "File will not be generated" << RESTendl;
866 return;
867 }
868 }
869
870 FILE* f = fopen(filename.c_str(), "wt");
872 fprintf(f, "### TRestDataSet generated file\n");
873 fprintf(f, "### \n");
874 fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str());
875 fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str());
876 fprintf(f, "###\n");
877 fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration);
878 fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.);
879 fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.);
880 fprintf(f, "###\n");
881 fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str());
882 fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str());
883 fprintf(f, "###\n");
884 if (!fFilterMetadata.empty()) {
885 fprintf(f, "### Metadata filters : \n");
886 int n = 0;
887 for (const auto& md : fFilterMetadata) {
888 fprintf(f, "### - %s.", md.c_str());
889 if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
890 if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
891 if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
892 if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
893 fprintf(f, "\n");
894 n++;
895 }
896 }
897 fprintf(f, "###\n");
898 fprintf(f, "### Relevant quantities: \n");
899 for (auto& [name, properties] : fQuantity) {
900 fprintf(f, "### - %s : %s - %s\n", name.c_str(), properties.value.c_str(),
901 properties.description.c_str());
902 }
903 fprintf(f, "###\n");
904 fprintf(f, "### Observables list: ");
905 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
906 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
907 fprintf(f, " %s", bName.c_str());
908 }
909 fprintf(f, "\n");
910 fprintf(f, "###\n");
911 fprintf(f, "### Data starts here\n");
912
913 auto obsNames = fDataSet.GetColumnNames();
914 std::string obsListStr = "";
915 for (const auto& l : obsNames) {
916 if (!obsListStr.empty()) obsListStr += ":";
917 obsListStr += l;
918 }
919
920 // We do this so that later we can recover the values using TTree::GetVal
921 fTree->Draw((TString)obsListStr, "", "goff");
922
923 for (unsigned int n = 0; n < fTree->GetEntries(); n++) {
924 for (unsigned int m = 0; m < GetNumberOfBranches(); m++) {
925 std::string bName = fTree->GetListOfBranches()->At(m)->GetName();
926 if (m > 0) fprintf(f, "\t");
927 if (dataTypes[m] == "Double_t")
928 if (bName == "timeStamp")
929 fprintf(f, "%010.0lf", fTree->GetVal(m)[n]);
930 else
931 fprintf(f, "%05.3e", fTree->GetVal(m)[n]);
932 else
933 fprintf(f, "%06d", (Int_t)fTree->GetVal(m)[n]);
934 }
935 fprintf(f, "\n");
936 }
937 fclose(f);
938
939 return;
940 } else if (TRestTools::GetFileNameExtension(filename) == "root") {
941 fDataSet.Snapshot("AnalysisTree", filename);
942
943 TFile* f = TFile::Open(filename.c_str(), "UPDATE");
944 std::string name = this->GetName();
945 if (name.empty()) name = "mock";
946 this->Write(name.c_str());
947 f->Close();
948 } else {
949 RESTWarning << "TRestDataSet::Export. Extension " << TRestTools::GetFileNameExtension(filename)
950 << " not recognized" << RESTendl;
951 }
952 RESTInfo << "Dataset generated: " << filename << RESTendl;
953}
954
959 SetName(dS.GetName());
960 fFilterStartTime = dS.GetFilterStartTime();
961 fFilterEndTime = dS.GetFilterEndTime();
962 fStartTime = dS.GetStartTime();
963 fEndTime = dS.GetEndTime();
964 fFilePattern = dS.GetFilePattern();
965 fObservablesList = dS.GetObservablesList();
967 fProcessObservablesList = dS.GetProcessObservablesList();
968 fFilterMetadata = dS.GetFilterMetadata();
969 fFilterContains = dS.GetFilterContains();
970 fFilterGreaterThan = dS.GetFilterGreaterThan();
971 fFilterLowerThan = dS.GetFilterLowerThan();
972 fFilterEqualsTo = dS.GetFilterEqualsTo();
973 fQuantity = dS.GetQuantity();
974 fColumnNameExpressions = dS.GetAddedColumns();
976 fCut = dS.GetCut();
977
978 return *this;
979}
980
986 auto obsNames = GetObservablesList();
987 for (const auto& obs : fObservablesList) {
988 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
989 RESTError << "Cannot merge dataSets with different observable list " << RESTendl;
990 return false;
991 }
992 }
993
994 if (fStartTime > dS.GetStartTime()) fStartTime = dS.GetStartTime();
995 if (fEndTime < dS.GetEndTime()) fEndTime = dS.GetEndTime();
996
997 auto fileSelection = dS.GetFileSelection();
998 fFileSelection.insert(fFileSelection.end(), fileSelection.begin(), fileSelection.end());
999
1001
1002 return true;
1003}
1004
1010void TRestDataSet::Import(const std::string& fileName) {
1011 if (TRestTools::GetFileNameExtension(fileName) != "root") {
1012 RESTError << "Datasets can only be imported from root files" << RESTendl;
1013 return;
1014 }
1015
1016 TRestDataSet* dS = nullptr;
1017 TFile* file = TFile::Open(fileName.c_str(), "READ");
1018 if (file != nullptr) {
1019 TIter nextkey(file->GetListOfKeys());
1020 TKey* key;
1021 while ((key = (TKey*)nextkey())) {
1022 std::string kName = key->GetClassName();
1023 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1024 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1025 dS = file->Get<TRestDataSet>(key->GetName());
1026 *this = *dS;
1027 }
1028 }
1029 }
1030
1031 if (dS == nullptr) {
1032 RESTError << fileName << " is not a valid dataSet" << RESTendl;
1033 return;
1034 }
1035
1036 if (fMT)
1037 ROOT::EnableImplicitMT();
1038 else
1039 ROOT::DisableImplicitMT();
1040
1041 fDataSet = ROOT::RDataFrame("AnalysisTree", fileName);
1042
1043 fTree = (TChain*)file->Get("AnalysisTree");
1044}
1045
1055void TRestDataSet::Import(std::vector<std::string> fileNames) {
1056 for (const auto& fN : fileNames)
1057 if (TRestTools::GetFileNameExtension(fN) != "root") {
1058 RESTError << "Datasets can only be imported from root files" << RESTendl;
1059 return;
1060 }
1061
1062 int count = 0;
1063 auto it = fileNames.begin();
1064 while (it != fileNames.end()) {
1065 std::string fileName = *it;
1066 TFile* file = TFile::Open(fileName.c_str(), "READ");
1067 bool isValid = false;
1068 if (file != nullptr) {
1069 TIter nextkey(file->GetListOfKeys());
1070 TKey* key;
1071 while ((key = (TKey*)nextkey())) {
1072 std::string kName = key->GetClassName();
1073 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1074 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1075 TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
1077 dS->PrintMetadata();
1078
1079 if (count == 0) {
1080 *this = *dS;
1081 isValid = true;
1082 } else {
1083 isValid = Merge(*dS);
1084 }
1085
1086 if (isValid) count++;
1087 }
1088 }
1089 } else {
1090 RESTError << "Cannot open " << fileName << RESTendl;
1091 }
1092
1093 if (!isValid) {
1094 RESTError << fileName << " is not a valid dataSet skipping..." << RESTendl;
1095 it = fileNames.erase(it);
1096 } else {
1097 ++it;
1098 }
1099 }
1100
1101 if (fileNames.empty()) {
1102 RESTError << "File selection is empty, dataSet will not be imported " << RESTendl;
1103 return;
1104 }
1105
1106 RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
1107 fDataSet = ROOT::RDataFrame("AnalysisTree", fileNames);
1108
1109 if (fTree != nullptr) {
1110 delete fTree;
1111 fTree = nullptr;
1112 }
1113 fTree = new TChain("AnalysisTree");
1114
1115 for (const auto& fN : fileNames) fTree->Add((TString)fN);
1116
1117 fMergedDataset = true;
1118 fImportedFiles = fileNames;
1119
1120 fQuantity.clear();
1121}
std::vector< std::string > GetObservableNames()
It returns a vector with strings containing all the observables that exist in the analysis tree.
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition: TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
Definition: TRestDataSet.h:34
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
Definition: TRestDataSet.h:70
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
Definition: TRestDataSet.h:76
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
Definition: TRestDataSet.h:118
std::vector< std::string > fProcessObservablesList
It contains a list of the process where all observables should be added.
Definition: TRestDataSet.h:64
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
Definition: TRestDataSet.h:82
ROOT::RDF::RNode fDataSet
The resulting RDF::RNode object after initialization.
Definition: TRestDataSet.h:115
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
Definition: TRestDataSet.h:106
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
Definition: TRestDataSet.h:97
size_t GetNumberOfBranches()
Number of variables (or observables)
Definition: TRestDataSet.h:158
TRestDataSet()
Default constructor.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
Definition: TRestDataSet.h:164
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
Definition: TRestDataSet.h:109
TRestCut * fCut
Parameter cuts over the selected dataset.
Definition: TRestDataSet.h:85
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Definition: TRestDataSet.h:52
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
Definition: TRestDataSet.h:67
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
Definition: TRestDataSet.h:91
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
Definition: TRestDataSet.h:161
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Definition: TRestDataSet.h:55
Double_t fStartTime
TimeStamp for the start time of the first file.
Definition: TRestDataSet.h:94
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
Definition: TRestDataSet.h:61
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
Definition: TRestDataSet.h:100
void Initialize() override
This function initialize different parameters from the TRestDataSet.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Definition: TRestDataSet.h:103
Double_t fTotalDuration
The total integrated run time of selected files.
Definition: TRestDataSet.h:88
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
Definition: TRestDataSet.h:58
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
Definition: TRestDataSet.h:73
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
Definition: TRestDataSet.h:79
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
Definition: TRestMetadata.h:70
virtual void PrintMetadata()
Implemented it in the derived metadata class to print out specific metadata information.
endl_t RESTendl
Termination flag object for TRestStringOutput.
TiXmlElement * GetElement(std::string eleDeclare, TiXmlElement *e=nullptr)
Get an xml element from a given parent element, according to its declaration.
Int_t LoadConfigFromFile(const std::string &configFilename, const std::string &sectionName="")
Give the file name, find out the corresponding section. Then call the main starter.
TRestMetadata * InstantiateChildMetadata(int index, std::string pattern="")
This method will retrieve a new TRestMetadata instance of a child element of the present TRestMetadat...
virtual void InitFromConfigFile()
To make settings from rml file. This method must be implemented in the derived class.
TRestStringOutput::REST_Verbose_Level GetVerboseLevel()
returns the verboselevel in type of REST_Verbose_Level enumerator
std::string GetFieldValue(std::string parName, TiXmlElement *e)
Returns the field value of an xml element which has the specified name.
void SetSectionName(std::string sName)
set the section name, clear the section content
std::string fConfigFileName
Full name of the rml file.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
overwriting the write() method with fStore considered
TiXmlElement * GetNextElement(TiXmlElement *e)
Get the next sibling xml element of this element, with same eleDeclare.
Data provider and manager in REST.
Definition: TRestRun.h:18
std::string ReplaceMetadataMembers(const std::string &instr, Int_t precision=8)
It will replace the data members contained inside the string given as input. The data members in the ...
Definition: TRestRun.cxx:1652
@ REST_Info
+show most of the information for each steps
static std::pair< std::string, std::string > SeparatePathAndName(const std::string &fullname)
Separate path and filename in a full path+filename string, returns a pair of string.
Definition: TRestTools.cxx:813
static std::string GetFileNameExtension(const std::string &fullname)
Gets the file extension as the substring found after the latest ".".
Definition: TRestTools.cxx:823
static std::vector< std::string > GetFilesMatchingPattern(std::string pattern, bool unlimited=false)
Returns a list of files whose name match the pattern string. Key word is "*". e.g....
Definition: TRestTools.cxx:976
TClass * GetClassQuick(std::string type)
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::vector< std::string > Split(std::string in, std::string separator, bool allowBlankString=false, bool removeWhiteSpaces=false, int startPos=-1)
Split the input string according to the given separator. Returning a vector of fragments.
Double_t StringToDouble(std::string in)
Gets a double from a string.
std::string ToDateTimeString(time_t time)
Format time_t into string.
std::string Replace(std::string in, std::string thisString, std::string byThisString, size_t fromPosition=0, Int_t N=0)
Replace any occurences of thisSring by byThisString inside string in.
std::string metadata
The associated metadata member used to register the relevant quantity.
Definition: TRestDataSet.h:38
std::string description
A user given description that can be used to define the relevant quantity.
Definition: TRestDataSet.h:44
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
Definition: TRestDataSet.h:41
std::string value
The quantity value.
Definition: TRestDataSet.h:47