// @(#)root/tmva $Id$
// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : DataSetFactory                                                        *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      Contains all the data information                                         *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
 *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
 *      Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland           *
 *      Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany         *
 *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
 *                                                                                *
 * Copyright (c) 2006:                                                            *
 *      CERN, Switzerland                                                         *
 *      MPI-K Heidelberg, Germany                                                 *
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://tmva.sourceforge.net/LICENSE)                                          *
 **********************************************************************************/

#ifndef ROOT_TMVA_DataSetFactory
#define ROOT_TMVA_DataSetFactory

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// DataSetFactory                                                       //
//                                                                      //
// Class that contains all the data information                         //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#include <vector>
#include <stdlib.h>

#ifndef ROOT_TString
#include "TString.h"
#endif
#ifndef ROOT_TTree
#include "TTree.h"
#endif
#ifndef ROOT_TCut
#include "TCut.h"
#endif
#ifndef ROOT_TTreeFormula
#include "TTreeFormula.h"
#endif
#ifndef ROOT_TMatrixDfwd
#include "TMatrixDfwd.h"
#endif
#ifndef ROOT_TPrincipal
#include "TPrincipal.h"
#endif
#ifndef ROOT_TRandom3
#include "TRandom3.h"
#endif

#ifndef ROOT_TMVA_Types
#include "TMVA/Types.h"
#endif
#ifndef ROOT_TMVA_VariableInfo
#include "TMVA/VariableInfo.h"
#endif
#ifndef ROOT_TMVA_Event
#include "TMVA/Event.h"
#endif

namespace TMVA {

   class DataSet;
   class DataSetInfo;
   class DataInputHandler;
   class TreeInfo;
   class MsgLogger;

   // =============== maybe move these elswhere (e.g. into the tools )

   // =============== functors =======================


   class RandomGenerator {
   public:
      RandomGenerator( UInt_t seed ){
         fRandom.SetSeed( seed );
      }
      UInt_t operator() ( UInt_t n ) {
         return fRandom.Integer(n);
      }
   private:
      TRandom3 fRandom; // random generator
   };


   // delete-functor (to be used in e.g. for_each algorithm)
   template<class T>
      struct DeleteFunctor_t
      {
         DeleteFunctor_t& operator()(const T* p) {
            delete p;
            return *this;
         }
      };

   template<class T>
      DeleteFunctor_t<const T> DeleteFunctor()
      {
         return DeleteFunctor_t<const T>();
      }


   template< typename T >
      class Increment {
      T value;
   public:
   Increment( T start ) : value( start ){ }
      T operator()() {
         return value++;
      }
   };



   template <typename F>
      class null_t
      {
      private:
         // returns argF
      public:
         typedef F argument_type;
         F operator()(const F& argF) const 
         {
            return argF;
         }
      };

   template <typename F>
      inline null_t<F> null() {
      return null_t<F>();
   }



   template <typename F, typename G, typename H>
      class compose_binary_t : public std::binary_function<typename G::argument_type,
      typename H::argument_type,
      typename F::result_type>
         {
         private:
            const F& f;    // f(g(argG),h(argH))
            const G& g;
            const H& h;
         public:
         compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h) 
            {
            }

            typename F::result_type operator()(const typename G::argument_type& argG,
                                               const typename H::argument_type& argH) const 
               {
                  return f(g(argG),h(argH));
               }
         };

   template <typename F, typename G, typename H>
      inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
      return compose_binary_t<F,G,H>(_f,_g,_h);
   }




   template <typename F, typename G>
      class compose_unary_t : public std::unary_function<typename G::argument_type,
      typename F::result_type>
         {
         private:
            const F& f;    // f(g(argG))
            const G& g;
         public:
         compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g) 
            {
            }

            typename F::result_type operator()(const typename G::argument_type&  argG) const 
               {
                  return f(g(argG));
               }
         };

   template <typename F, typename G>
      inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
      return compose_unary_t<F,G>(_f,_g);
   }

   // =============== functors =======================


   // =========================================================


   class DataSetFactory:public TObject {

      typedef std::vector<Event* >                             EventVector;
      typedef std::vector< EventVector >                        EventVectorOfClasses;
      typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
      typedef std::map<Types::ETreeType, EventVector >          EventVectorOfTreeType;

      typedef std::vector< Double_t >                    ValuePerClass;
      typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;

      class EventStats {
      public:
         Int_t    nTrainingEventsRequested;
         Int_t    nTestingEventsRequested;
         Float_t  TrainTestSplitRequested;
         Int_t    nInitialEvents;
         Int_t    nEvBeforeCut;
         Int_t    nEvAfterCut;
         Float_t  nWeEvBeforeCut;
         Float_t  nWeEvAfterCut;
         Double_t nNegWeights;
         Float_t* varAvLength;//->
      EventStats():
         nTrainingEventsRequested(0),
            nTestingEventsRequested(0),
            TrainTestSplitRequested(0),
            nInitialEvents(0),
            nEvBeforeCut(0),
            nEvAfterCut(0),
            nWeEvBeforeCut(0),
            nWeEvAfterCut(0),
            nNegWeights(0),
            varAvLength(0)
               {}
         ~EventStats() { delete[] varAvLength; }
         Float_t cutScaling() const { return Float_t(nEvAfterCut)/nEvBeforeCut; }
      };

      typedef std::vector< int >                            NumberPerClass;
      typedef std::vector< EventStats >                     EvtStatsPerClass;

   public:

      ~DataSetFactory();

      DataSetFactory();

      DataSet* CreateDataSet( DataSetInfo &, DataInputHandler& );
   protected:
     

      DataSet*  BuildInitialDataSet( DataSetInfo&, TMVA::DataInputHandler& );
      DataSet*  BuildDynamicDataSet( DataSetInfo& );

      // ---------- new versions
      void      BuildEventVector ( DataSetInfo& dsi,
                                   DataInputHandler& dataInput,
                                   EventVectorOfClassesOfTreeType& eventsmap,
                                   EvtStatsPerClass& eventCounts);

      DataSet*  MixEvents        ( DataSetInfo& dsi,
                                   EventVectorOfClassesOfTreeType& eventsmap,
                                   EvtStatsPerClass& eventCounts,
                                   const TString& splitMode,
                                   const TString& mixMode,
                                   const TString& normMode,
                                   UInt_t splitSeed);

      void      RenormEvents     ( DataSetInfo& dsi,
                                   EventVectorOfClassesOfTreeType& eventsmap,
                                   const EvtStatsPerClass& eventCounts,
                                   const TString& normMode );

      void      InitOptions      ( DataSetInfo& dsi,
                                   EvtStatsPerClass& eventsmap,
                                   TString& normMode, UInt_t& splitSeed,
                                   TString& splitMode, TString& mixMode);


      // ------------------------

      // auxiliary functions to compute correlations
      TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
      TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
      void      CalcMinMax           ( DataSet*, DataSetInfo& dsi );

      // resets branch addresses to current event
      void   ResetBranchAndEventAddresses( TTree* );
      void   ResetCurrentTree() { fCurrentTree = 0; }
      void   ChangeToNewTree( TreeInfo&, const DataSetInfo & );
      Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );

      // verbosity
      Bool_t Verbose() { return fVerbose; }

      // data members

      // verbosity
      Bool_t                     fVerbose;           // Verbosity
      TString                    fVerboseLevel;      // VerboseLevel

      Bool_t                     fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts 

      // the event
      TTree*                     fCurrentTree;       // the tree, events are currently read from
      UInt_t                     fCurrentEvtIdx;     // the current event (to avoid reading of the same event)

      // the formulas for reading the original tree
      std::vector<TTreeFormula*> fInputFormulas;   // input variables
      std::vector<TTreeFormula*> fTargetFormulas;  // targets
      std::vector<TTreeFormula*> fCutFormulas;     // cuts
      std::vector<TTreeFormula*> fWeightFormula;   // weights
      std::vector<TTreeFormula*> fSpectatorFormulas; // spectators

      MsgLogger*                 fLogger;          //! message logger
      MsgLogger& Log() const { return *fLogger; }
   public:
       
       ClassDef(DataSetFactory,1);
   };
}

#endif
 DataSetFactory.h:1
 DataSetFactory.h:2
 DataSetFactory.h:3
 DataSetFactory.h:4
 DataSetFactory.h:5
 DataSetFactory.h:6
 DataSetFactory.h:7
 DataSetFactory.h:8
 DataSetFactory.h:9
 DataSetFactory.h:10
 DataSetFactory.h:11
 DataSetFactory.h:12
 DataSetFactory.h:13
 DataSetFactory.h:14
 DataSetFactory.h:15
 DataSetFactory.h:16
 DataSetFactory.h:17
 DataSetFactory.h:18
 DataSetFactory.h:19
 DataSetFactory.h:20
 DataSetFactory.h:21
 DataSetFactory.h:22
 DataSetFactory.h:23
 DataSetFactory.h:24
 DataSetFactory.h:25
 DataSetFactory.h:26
 DataSetFactory.h:27
 DataSetFactory.h:28
 DataSetFactory.h:29
 DataSetFactory.h:30
 DataSetFactory.h:31
 DataSetFactory.h:32
 DataSetFactory.h:33
 DataSetFactory.h:34
 DataSetFactory.h:35
 DataSetFactory.h:36
 DataSetFactory.h:37
 DataSetFactory.h:38
 DataSetFactory.h:39
 DataSetFactory.h:40
 DataSetFactory.h:41
 DataSetFactory.h:42
 DataSetFactory.h:43
 DataSetFactory.h:44
 DataSetFactory.h:45
 DataSetFactory.h:46
 DataSetFactory.h:47
 DataSetFactory.h:48
 DataSetFactory.h:49
 DataSetFactory.h:50
 DataSetFactory.h:51
 DataSetFactory.h:52
 DataSetFactory.h:53
 DataSetFactory.h:54
 DataSetFactory.h:55
 DataSetFactory.h:56
 DataSetFactory.h:57
 DataSetFactory.h:58
 DataSetFactory.h:59
 DataSetFactory.h:60
 DataSetFactory.h:61
 DataSetFactory.h:62
 DataSetFactory.h:63
 DataSetFactory.h:64
 DataSetFactory.h:65
 DataSetFactory.h:66
 DataSetFactory.h:67
 DataSetFactory.h:68
 DataSetFactory.h:69
 DataSetFactory.h:70
 DataSetFactory.h:71
 DataSetFactory.h:72
 DataSetFactory.h:73
 DataSetFactory.h:74
 DataSetFactory.h:75
 DataSetFactory.h:76
 DataSetFactory.h:77
 DataSetFactory.h:78
 DataSetFactory.h:79
 DataSetFactory.h:80
 DataSetFactory.h:81
 DataSetFactory.h:82
 DataSetFactory.h:83
 DataSetFactory.h:84
 DataSetFactory.h:85
 DataSetFactory.h:86
 DataSetFactory.h:87
 DataSetFactory.h:88
 DataSetFactory.h:89
 DataSetFactory.h:90
 DataSetFactory.h:91
 DataSetFactory.h:92
 DataSetFactory.h:93
 DataSetFactory.h:94
 DataSetFactory.h:95
 DataSetFactory.h:96
 DataSetFactory.h:97
 DataSetFactory.h:98
 DataSetFactory.h:99
 DataSetFactory.h:100
 DataSetFactory.h:101
 DataSetFactory.h:102
 DataSetFactory.h:103
 DataSetFactory.h:104
 DataSetFactory.h:105
 DataSetFactory.h:106
 DataSetFactory.h:107
 DataSetFactory.h:108
 DataSetFactory.h:109
 DataSetFactory.h:110
 DataSetFactory.h:111
 DataSetFactory.h:112
 DataSetFactory.h:113
 DataSetFactory.h:114
 DataSetFactory.h:115
 DataSetFactory.h:116
 DataSetFactory.h:117
 DataSetFactory.h:118
 DataSetFactory.h:119
 DataSetFactory.h:120
 DataSetFactory.h:121
 DataSetFactory.h:122
 DataSetFactory.h:123
 DataSetFactory.h:124
 DataSetFactory.h:125
 DataSetFactory.h:126
 DataSetFactory.h:127
 DataSetFactory.h:128
 DataSetFactory.h:129
 DataSetFactory.h:130
 DataSetFactory.h:131
 DataSetFactory.h:132
 DataSetFactory.h:133
 DataSetFactory.h:134
 DataSetFactory.h:135
 DataSetFactory.h:136
 DataSetFactory.h:137
 DataSetFactory.h:138
 DataSetFactory.h:139
 DataSetFactory.h:140
 DataSetFactory.h:141
 DataSetFactory.h:142
 DataSetFactory.h:143
 DataSetFactory.h:144
 DataSetFactory.h:145
 DataSetFactory.h:146
 DataSetFactory.h:147
 DataSetFactory.h:148
 DataSetFactory.h:149
 DataSetFactory.h:150
 DataSetFactory.h:151
 DataSetFactory.h:152
 DataSetFactory.h:153
 DataSetFactory.h:154
 DataSetFactory.h:155
 DataSetFactory.h:156
 DataSetFactory.h:157
 DataSetFactory.h:158
 DataSetFactory.h:159
 DataSetFactory.h:160
 DataSetFactory.h:161
 DataSetFactory.h:162
 DataSetFactory.h:163
 DataSetFactory.h:164
 DataSetFactory.h:165
 DataSetFactory.h:166
 DataSetFactory.h:167
 DataSetFactory.h:168
 DataSetFactory.h:169
 DataSetFactory.h:170
 DataSetFactory.h:171
 DataSetFactory.h:172
 DataSetFactory.h:173
 DataSetFactory.h:174
 DataSetFactory.h:175
 DataSetFactory.h:176
 DataSetFactory.h:177
 DataSetFactory.h:178
 DataSetFactory.h:179
 DataSetFactory.h:180
 DataSetFactory.h:181
 DataSetFactory.h:182
 DataSetFactory.h:183
 DataSetFactory.h:184
 DataSetFactory.h:185
 DataSetFactory.h:186
 DataSetFactory.h:187
 DataSetFactory.h:188
 DataSetFactory.h:189
 DataSetFactory.h:190
 DataSetFactory.h:191
 DataSetFactory.h:192
 DataSetFactory.h:193
 DataSetFactory.h:194
 DataSetFactory.h:195
 DataSetFactory.h:196
 DataSetFactory.h:197
 DataSetFactory.h:198
 DataSetFactory.h:199
 DataSetFactory.h:200
 DataSetFactory.h:201
 DataSetFactory.h:202
 DataSetFactory.h:203
 DataSetFactory.h:204
 DataSetFactory.h:205
 DataSetFactory.h:206
 DataSetFactory.h:207
 DataSetFactory.h:208
 DataSetFactory.h:209
 DataSetFactory.h:210
 DataSetFactory.h:211
 DataSetFactory.h:212
 DataSetFactory.h:213
 DataSetFactory.h:214
 DataSetFactory.h:215
 DataSetFactory.h:216
 DataSetFactory.h:217
 DataSetFactory.h:218
 DataSetFactory.h:219
 DataSetFactory.h:220
 DataSetFactory.h:221
 DataSetFactory.h:222
 DataSetFactory.h:223
 DataSetFactory.h:224
 DataSetFactory.h:225
 DataSetFactory.h:226
 DataSetFactory.h:227
 DataSetFactory.h:228
 DataSetFactory.h:229
 DataSetFactory.h:230
 DataSetFactory.h:231
 DataSetFactory.h:232
 DataSetFactory.h:233
 DataSetFactory.h:234
 DataSetFactory.h:235
 DataSetFactory.h:236
 DataSetFactory.h:237
 DataSetFactory.h:238
 DataSetFactory.h:239
 DataSetFactory.h:240
 DataSetFactory.h:241
 DataSetFactory.h:242
 DataSetFactory.h:243
 DataSetFactory.h:244
 DataSetFactory.h:245
 DataSetFactory.h:246
 DataSetFactory.h:247
 DataSetFactory.h:248
 DataSetFactory.h:249
 DataSetFactory.h:250
 DataSetFactory.h:251
 DataSetFactory.h:252
 DataSetFactory.h:253
 DataSetFactory.h:254
 DataSetFactory.h:255
 DataSetFactory.h:256
 DataSetFactory.h:257
 DataSetFactory.h:258
 DataSetFactory.h:259
 DataSetFactory.h:260
 DataSetFactory.h:261
 DataSetFactory.h:262
 DataSetFactory.h:263
 DataSetFactory.h:264
 DataSetFactory.h:265
 DataSetFactory.h:266
 DataSetFactory.h:267
 DataSetFactory.h:268
 DataSetFactory.h:269
 DataSetFactory.h:270
 DataSetFactory.h:271
 DataSetFactory.h:272
 DataSetFactory.h:273
 DataSetFactory.h:274
 DataSetFactory.h:275
 DataSetFactory.h:276
 DataSetFactory.h:277
 DataSetFactory.h:278
 DataSetFactory.h:279
 DataSetFactory.h:280
 DataSetFactory.h:281
 DataSetFactory.h:282
 DataSetFactory.h:283
 DataSetFactory.h:284
 DataSetFactory.h:285
 DataSetFactory.h:286
 DataSetFactory.h:287
 DataSetFactory.h:288
 DataSetFactory.h:289
 DataSetFactory.h:290
 DataSetFactory.h:291
 DataSetFactory.h:292
 DataSetFactory.h:293
 DataSetFactory.h:294
 DataSetFactory.h:295
 DataSetFactory.h:296
 DataSetFactory.h:297
 DataSetFactory.h:298
 DataSetFactory.h:299
 DataSetFactory.h:300
 DataSetFactory.h:301
 DataSetFactory.h:302
 DataSetFactory.h:303
 DataSetFactory.h:304
 DataSetFactory.h:305
 DataSetFactory.h:306
 DataSetFactory.h:307
 DataSetFactory.h:308
 DataSetFactory.h:309
 DataSetFactory.h:310
 DataSetFactory.h:311
 DataSetFactory.h:312
 DataSetFactory.h:313
 DataSetFactory.h:314
 DataSetFactory.h:315
 DataSetFactory.h:316
 DataSetFactory.h:317
 DataSetFactory.h:318
 DataSetFactory.h:319
 DataSetFactory.h:320
 DataSetFactory.h:321
 DataSetFactory.h:322
 DataSetFactory.h:323
 DataSetFactory.h:324
 DataSetFactory.h:325
 DataSetFactory.h:326
 DataSetFactory.h:327
 DataSetFactory.h:328
 DataSetFactory.h:329
 DataSetFactory.h:330