azmfaridee / mothur

This is GSoC2012 fork of 'Mothur'. We are trying to implement a number of 'Feature Selection' algorithms for microbial ecology data and incorporate them into mother's main codebase.
https://github.com/mothur/mothur
GNU General Public License v3.0
3 stars 1 forks source link

Take a Deeper Review on the Paper "Feature Selection via Regularized Trees" #13

Closed azmfaridee closed 11 years ago

azmfaridee commented 12 years ago

Parent Issue #11

Outline the algorithm described in the the paper Feature Selection via Regularized Trees. Also examine the R packages the authors described and find out how useful it can be for us.

azmfaridee commented 12 years ago

I have been trying to find out the differences between the Random Forest and Regularized Random Forest R package source codes. Both of them has src folder with the following structure:

src/
├── classTree.c
├── regTree.c
├── regrf.c
├── rf.c
├── rf.h
├── rfsub.f
└── rfutils.c

Running FileMerge between them shows off the basic difference between the algos (RRF just adding some new parameters)

diff -u RandomForestResouces/randomForest/src/classTree.c RegulalizedRandormForestResouces/RRF/src/classTree.c
--- RandomForestResouces/randomForest/src/classTree.c   2012-01-06 20:15:31.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/classTree.c    2012-04-09 06:22:33.000000000 +0600
@@ -22,7 +22,8 @@
                double *bestsplitnext, double *tgini, int *nodeStatus,
                int *nodePop, int *nodeStart, double *tclassPop, int maxNodes,
                int nodeSize, int *ncase, int *inBag, int mTry, int *varUsed,
-               int *nodeClass, int *treeSize, double *win) {
+               int *nodeClass, int *treeSize, double *win,
+              int *varUsedAll, double *coefReg, int *flagReg) {
 /* Buildtree consists of repeated calls to two subroutines, Findbestsplit
    and Movedata.  Findbestsplit does just that--it finds the best split of
    the current node.  Movedata moves the data in the split node right and
@@ -78,13 +79,15 @@
         F77_CALL(findbestsplit)(a, b, cl, mdim, nsample, nclass, cat,
                                 ndstart, ndend, tclassPop, tclasscat,
                                 &msplit, &decsplit, &nbest, ncase, &jstat,
-                                inBag, mTry, win, wr, wc, wl, mred, i, mind);
+                                inBag, mTry, win, wr, wc, wl, mred, i, mind, varUsedAll,coefReg,flagReg);
         if (jstat == 1) {
             nodeStatus[i] = NODE_TERMINAL;
             continue;
         } else {
+           //continue;//new debug
             bestvar[i] = msplit;
             varUsed[msplit - 1] = 1;
+           //varUsedAll[msplit - 1] = 1;
             tgini[msplit - 1] += decsplit;
             if (cat[msplit-1] == 1) {
                 bestsplit[i] = a[msplit - 1  + nbest * mdim];
@@ -144,20 +147,15 @@
     for (k = 0; k < ndbigtree; ++k) {
         if (nodeStatus[k] == NODE_TERMINAL) {
             pp = 0;
-            ntie = 1;
             for (j = 0; j < nclass; ++j) {
                 if (classPop[j + k * nclass] > pp) {
                     nodeClass[k] = j;
                     pp = classPop[j + k * nclass];
-                    ntie = 1;
                 }
                 /* Break ties at random: */
-                if (classPop[j + k * nclass] == pp) {
-                   if (unif_rand() < 1.0 / ntie) {
-                       nodeClass[k] = j;
-                       pp = classPop[j + k * nclass];
-                   }
-                   ntie++;
+                if (classPop[j + k * nclass] == pp && unif_rand() > 0.5) {
+                    nodeClass[k] = j;
+                    pp = classPop[j + k * nclass];
                 }
             }
         }
@@ -242,16 +240,15 @@
                             *bestSplit = j;
                             critmax = crit;
                             *splitVar = mvar;
-                            ntie = 1;
                         }
                         /* Break ties at random: */
                         if (crit == critmax) {
-                           if (unif_rand() < 1.0 / ntie) {
-                               *bestSplit = j;
-                               critmax = crit;
-                               *splitVar = mvar;
-                           }
-                           ntie++;
+               ntie++;
+               if (unif_rand() > 1.0 / ntie) {
+               *bestSplit = j;
+               critmax = crit;
+               *splitVar = mvar;
+               }
                         }
                     }
                 }
@@ -295,7 +292,7 @@
 void F77_NAME(catmax)(double *parentDen, double *tclasscat,
                       double *tclasspop, int *nclass, int *lcat,
                       unsigned int *ncatsp, double *critmax, int *nhit,
-                      int *maxcat, int *ncmax, int *ncsplit) {
+                      int *maxcat, int *ncmax, int *ncsplit,double *coefReg, int *flagReg) {
 /* This finds the best split of a categorical variable with lcat
    categories and nclass classes, where tclasscat(j, k) is the number
    of cases in class j with category value k. The method uses an
@@ -342,6 +339,8 @@
             rightNum += leftCatClassCount[j] * leftCatClassCount[j];
         }
         decGini = (leftNum / leftDen) + (rightNum / (*parentDen - leftDen));
+//If regularization and not used, then regularize new       
+       if((*flagReg)==1)decGini = (*coefReg) * decGini;
         if (decGini > *critmax) {
             *critmax = decGini;
             *nhit = 1;
@@ -356,7 +355,7 @@
 /* Find best split of with categorical variable when there are two classes */
 void F77_NAME(catmaxb)(double *totalWt, double *tclasscat, double *classCount,
                        int *nclass, int *nCat, unsigned int *nbest, double *critmax,
-                       int *nhit, double *catCount) {
+                       int *nhit, double *catCount, double *coefReg, int *flagReg) {

     double catProportion[32], cp[32], cm[32];
     int kcat[32];
@@ -391,6 +390,10 @@
             /* If neither node is empty, check the split. */
             if (rightDen > 1.0e-5 && leftDen > 1.0e-5) {
                 crit = (leftNum / leftDen) + (rightNum / rightDen);
+               
+               //If regularization and not used, then regularize new        
+               if(*flagReg==1)crit = (*coefReg) * crit;
+               
                 if (crit > *critmax) {
                     *critmax = crit;
                     bestsplit = .5 * (catProportion[i] + catProportion[i + 1]);
diff -u RandomForestResouces/randomForest/src/rf.c RegulalizedRandormForestResouces/RRF/src/rf.c
--- RandomForestResouces/randomForest/src/rf.c  2012-01-06 20:15:31.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/rf.c   2012-04-09 06:22:33.000000000 +0600
@@ -1,6 +1,6 @@
 /*****************************************************************
 Copyright (C) 2001-9 Leo Breiman, Adele Cutler and Merck & Co., Inc.
-
+ 
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
@@ -19,6 +19,8 @@
 Re-written from the original main program in Fortran.
 Andy Liaw Feb. 7, 2002.
 Modifications to get the forest out Matt Wiener Feb. 26, 2002.
+
+Extented to regularized random forest by Houtao Deng Oct. 27, 2011 
 *****************************************************************/

 #include <R.h>
@@ -44,7 +46,7 @@
         int *nodeclass, double *xbestsplit, double *errtr,
         int *testdat, double *xts, int *clts, int *nts, double *countts,
         int *outclts, int *labelts, double *proxts, double *errts,
-             int *inbag) {
+             int *inbag, double *coefReg, int *flagReg0) {
     /******************************************************************
      *  C wrapper for random forests:  get input from R and drive
      *  the Fortran routines.
@@ -75,6 +77,8 @@
      *  nodesize: minimum node size: no node with fewer than ndsize
      *            cases will be split
      *
+    *  coefReg: coefficient for the regularized gain
+    *  flagReg: 1=regularization; 0: without regularization
      *  Output:
      *
      *  outcl:    class predicted by RF
@@ -90,13 +94,16 @@
     int jb, j, n, m, k, idxByNnode, idxByNsample, imp, localImp, iprox,
    oobprox, keepf, replace, stratify, trace, *nright,
    *nrightimp, *nout, *nclts, Ntree;
-
+   //double coefReg;//new
+   int flagReg;//new
     int *out, *bestsplitnext, *bestsplit, *nodepop, *jin, *nodex,
    *nodexts, *nodestart, *ta, *ncase, *jerr, *varUsed,
    *jtr, *classFreq, *idmove, *jvr,
    *at, *a, *b, *mind, *nind, *jts, *oobpair;
-    int **strata_idx, *strata_size, last, ktmp, nEmpty, ntry;
-
+    int **strata_idx, *strata_size, last, ktmp, anyEmpty, ntry;
+   
+   int *varUsedAll; // the variable used in all previous splits
+   
     double av=0.0;

     double *tgini, *tx, *wl, *classpop, *tclasscat, *tclasspop, *win,
@@ -119,6 +126,14 @@
     Ntree    = *ntree;
     mtry     = *nvar;
     ntest    = *nts;
+   flagReg = *flagReg0;
+   //coefReg[0]=0.068;
+
+   //zeroInt(varDebug, mdim);
+   //varUsedAll[1]=10;//debug new
+   //varDebug=100;
+   //printf("%d\n",varDebug);
+   
     nsample = addClass ? (nsample0 + nsample0) : nsample0;
     mimp = imp ? mdim : 1;
     nimp = imp ? nsample : 1;
@@ -147,6 +162,7 @@
     ncase =         (int *) S_alloc(nsample, sizeof(int));
     jerr =          (int *) S_alloc(nsample, sizeof(int));
     varUsed =       (int *) S_alloc(mdim, sizeof(int));
+   varUsedAll =       (int *) S_alloc(mdim, sizeof(int));
     jtr =           (int *) S_alloc(nsample, sizeof(int));
     jvr =           (int *) S_alloc(nsample, sizeof(int));
     classFreq =     (int *) S_alloc(nclass, sizeof(int));
@@ -162,7 +178,9 @@
     if (oobprox) {
    oobpair = (int *) S_alloc(near*near, sizeof(int));
     }
-
+   
+   zeroInt(varUsedAll, mdim);
+   
     /* Count number of cases in each class. */
     zeroInt(classFreq, nclass);
     for (n = 0; n < nsample; ++n) classFreq[cl[n] - 1] ++;
@@ -192,7 +210,8 @@
     } else {
    nind = replace ? NULL : (int *) S_alloc(nsample, sizeof(int));
     }
-
+   
+   
     /*    INITIALIZE FOR RUN */
     if (*testdat) zeroDouble(countts, ntest * nclass);
     zeroInt(counttr, nclass * nsample);
@@ -233,16 +252,22 @@
     }
     idxByNnode = 0;
     idxByNsample = 0;
+   
+
+   //zeroInt(varUsedAll, mdim); //initialize the regularization vector new
+
+   //varDebug[19] = flagReg;
+   
     for (jb = 0; jb < Ntree; jb++) {
         /* Do we need to simulate data for the second class? */
-        if (addClass) createClass(x, nsample0, nsample, mdim);
+        if (addClass) createClass(x, nsample0, nsample, mdim);     
        do {
            zeroInt(nodestatus + idxByNnode, *nrnodes);
            zeroInt(treemap + 2*idxByNnode, 2 * *nrnodes);
            zeroDouble(xbestsplit + idxByNnode, *nrnodes);
-           zeroInt(nodeclass + idxByNnode, *nrnodes);
+           zeroInt(nodeclass + idxByNnode, *nrnodes);          
             zeroInt(varUsed, mdim);
-            /* TODO: Put all sampling code into a function. */
+           /* TODO: Put all sampling code into a function. */
             /* drawSample(sampsize, nsample, ); */
            if (stratify) {  /* stratified sampling */
                zeroInt(jin, nsample);
@@ -280,9 +305,9 @@
                    }
                }
            } else {  /* unstratified sampling */
+               anyEmpty = 0;
                ntry = 0;
                do {
-                   nEmpty = 0;
                    zeroInt(jin, nsample);
                    zeroDouble(tclasspop, nclass);
                    zeroDouble(win, nsample);
@@ -308,12 +333,10 @@
                    }
                    /* check if any class is missing in the sample */
                    for (n = 0; n < nclass; ++n) {
-                       if (tclasspop[n] == 0.0) nEmpty++;
+                       if (tclasspop[n] == 0) anyEmpty = 1;
                    }
                    ntry++;
-               } while (nclass - nEmpty < 2 && ntry <= 30);
-               /* If there are still fewer than two classes in the data, throw an error. */
-               if (nclass - nEmpty < 2) error("Still have fewer than two classes in the in-bag sample after 30 attempts.");
+               } while (anyEmpty && ntry <= 10);
            }

             /* If need to keep indices of inbag data, do that here. */
@@ -322,11 +345,17 @@
                     inbag[n + idxByNsample] = jin[n];
                 }
             }
+           

+           //int debug;for (n = 0; n < 10000000000; n++) debug =1;
+           
            /* Copy the original a matrix back. */
            memcpy(a, at, sizeof(int) * mdim * nsample);
            modA(a, &nuse, nsample, mdim, cat, *maxcat, ncase, jin);
-
+           //varUsedAll[1]=1000; //debug new
+           //varDebug[1]=1000;
+           //varDebug[20] = flagReg;
+           //varDebug[21] = &flagReg;
            F77_CALL(buildtree)(a, b, cl, cat, maxcat, &mdim, &nsample,
                                &nclass,
                                treemap + 2*idxByNnode, bestvar + idxByNnode,
@@ -334,16 +363,18 @@
                                nodestatus + idxByNnode, nodepop,
                                nodestart, classpop, tclasspop, tclasscat,
                                ta, nrnodes, idmove, &ndsize, ncase,
-                               &mtry, varUsed, nodeclass + idxByNnode,
+                               &mtry, varUsed,nodeclass + idxByNnode,
                                ndbigtree + jb, win, wr, wl, &mdim,
-                               &nuse, mind);
+                               &nuse, mind,varUsedAll,coefReg, &flagReg);
            /* if the "tree" has only the root node, start over */
        } while (ndbigtree[jb] == 1);

+       //int debug;for (n = 0; n < 10000000000; n++) debug =1;
+       
        Xtranslate(x, mdim, *nrnodes, nsample, bestvar + idxByNnode,
                   bestsplit, bestsplitnext, xbestsplit + idxByNnode,
                   nodestatus + idxByNnode, cat, ndbigtree[jb]);
-
+           
        /*  Get test set error */
        if (*testdat) {
             predictClassTree(xts, ntest, mdim, treemap + 2*idxByNnode,
@@ -548,7 +579,8 @@
                  double *pid, double *cutoff, double *countts, int *treemap,
                  int *nodestatus, int *cat, int *nodeclass, int *jts,
                  int *jet, int *bestvar, int *node, int *treeSize,
-                 int *keepPred, int *prox, double *proxMat, int *nodes) {
+                 int *keepPred, int *prox, double *proxMat, int *nodes,
+                double *coefReg, int *flagReg) {
     int j, n, n1, n2, idxNodes, offset1, offset2, *junk, ntie;
     double crit, cmax;

@@ -557,6 +589,7 @@
     offset1 = 0;
     offset2 = 0;
     junk = NULL;
+   

     for (j = 0; j < *ntree; ++j) {
        /* predict by the j-th tree */
@@ -587,12 +620,11 @@
            if (crit > cmax) {
                jet[n] = j + 1;
                cmax = crit;
-               ntie = 1;
            }
            /* Break ties at random: */
            if (crit == cmax) {
-               if (unif_rand() < 1.0 / ntie) jet[n] = j + 1;
                ntie++;
+               if (unif_rand() > 1.0 / ntie) jet[n] = j + 1;
            }
        }
     }
@@ -633,23 +665,22 @@
        smaxtr = 0.0;
        ntie = 1;
        for (j = 0; j < nclass; ++j) {
-           qq = (((double) counttr[j + n*nclass]) / out[n]) / cutoff[j];
-           if (j+1 != cl[n]) smax = (qq > smax) ? qq : smax;
-           /* if vote / cutoff is larger than current max, re-set max and
-              change predicted class to the current class */
-           if (qq > smaxtr) {
-               smaxtr = qq;
-               jest[n] = j+1;
-               ntie = 1;
-           }
-           /* break tie at random */
-           if (qq == smaxtr) {
-               if (unif_rand() < 1.0 / ntie) {
-                   smaxtr = qq;
-                   jest[n] = j+1;
-               }
-               ntie++;
-           }
+       qq = (((double) counttr[j + n*nclass]) / out[n]) / cutoff[j];
+       if (j+1 != cl[n]) smax = (qq > smax) ? qq : smax;
+       /* if vote / cutoff is larger than current max, re-set max and
+          change predicted class to the current class */
+       if (qq > smaxtr) {
+           smaxtr = qq;
+           jest[n] = j+1;
+       }
+       /* break tie at random */
+       if (qq == smaxtr) {
+           ntie++;
+           if (unif_rand() > 1.0 / ntie) {
+           smaxtr = qq;
+           jest[n] = j+1;
+           }
+       }
        }
        if (jest[n] != cl[n]) {
        errtr[cl[n]] += 1.0;
@@ -680,15 +711,14 @@
            if (crit > cmax) {
                jet[n] = j+1;
                cmax = crit;
-               ntie = 1;
            }
            /*  Break ties at random: */
            if (crit == cmax) {
-               if (unif_rand() < 1.0 / ntie) {
+               ntie++;
+               if (unif_rand() > 1.0 / ntie) {
                    jet[n] = j+1;
                    cmax = crit;
                }
-               ntie++;
            }
        }
     }
diff -u RandomForestResouces/randomForest/src/rf.h RegulalizedRandormForestResouces/RRF/src/rf.h
--- RandomForestResouces/randomForest/src/rf.h  2012-01-06 20:15:32.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/rf.h   2012-04-09 06:22:33.000000000 +0600
@@ -38,7 +38,7 @@
                  double *pid, double *cutoff, double *countts, int *treemap, 
                  int *nodestatus, int *cat, int *nodeclass, int *jts, 
                  int *jet, int *bestvar, int *nodexts, int *ndbigtree, 
-                 int *keepPred, int *prox, double *proxmatrix, int *nodes);
+                 int *keepPred, int *prox, double *proxmatrix, int *nodes, double *coefReg, int *flagReg);

 void regTree(double *x, double *y, int mdim, int nsample, 
         int *lDaughter, int *rDaughter, double *upper, double *avnode, 
@@ -92,7 +92,7 @@
                int *ta, int *nrnodes, int *, 
                int *, int *, int *, int *, int *, int *, 
                double *, double *, double *,
-               int *, int *, int *); 
+               int *, int *, int *,int *varUsedAll, double *coefReg, int *flagReg); 

 /* Node status */
 #define NODE_TERMINAL -1
diff -u RandomForestResouces/randomForest/src/rfsub.f RegulalizedRandormForestResouces/RRF/src/rfsub.f
--- RandomForestResouces/randomForest/src/rfsub.f   2012-01-06 20:15:32.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/rfsub.f    2012-04-09 06:22:33.000000000 +0600
@@ -21,7 +21,8 @@
      1     nclass, treemap, bestvar, bestsplit, bestsplitnext, tgini,
      1     nodestatus,nodepop, nodestart, classpop, tclasspop,
      1     tclasscat,ta,nrnodes, idmove, ndsize, ncase, mtry, iv,
-     1     nodeclass, ndbigtree, win, wr, wl, mred, nuse, mind)
+     1     nodeclass, ndbigtree, win, wr, wl, mred, nuse, mind, 
+     1     varUsedAll,coefReg, flagReg)

 c     Buildtree consists of repeated calls to two subroutines, Findbestsplit
 c     and Movedata.  Findbestsplit does just that--it finds the best split of
@@ -45,12 +46,12 @@
      1     nodepop(nrnodes), nodestart(nrnodes),
      1     bestsplitnext(nrnodes), idmove(nsample),
      1     ncase(nsample), b(mdim,nsample),
-     1     iv(mred), nodeclass(nrnodes), mind(mred)
-
+     1     iv(mred), nodeclass(nrnodes), mind(mred),varUsedAll(mred)
+    
       double precision tclasspop(nclass), classpop(nclass, nrnodes),
      1     tclasscat(nclass, 32), win(nsample), wr(nclass),
-     1     wl(nclass), tgini(mdim), xrand
-      integer msplit, ntie
+     1     wl(nclass), tgini(mdim), xrand, coefReg(mred)
+      integer msplit, ntie, flagReg

       msplit = 0
       call zerv(nodestatus,nrnodes)
@@ -58,6 +59,9 @@
       call zerv(nodepop,nrnodes)
       call zermr(classpop,nclass,nrnodes)

+
+     
+     
       do j=1,nclass
          classpop(j, 1) = tclasspop(j)
       end do
@@ -81,15 +85,24 @@

          call findbestsplit(a,b,cl,mdim,nsample,nclass,cat,maxcat,
      1        ndstart, ndend,tclasspop,tclasscat,msplit, decsplit,
-     1        nbest,ncase, jstat,mtry,win,wr,wl,mred,mind)
+     1        nbest,ncase, jstat,mtry,win,wr,wl,mred,mind,
+     1        varUsedAll,coefReg, flagReg)
 c         call intpr("jstat", 5, jstat, 1)
 c         call intpr("msplit", 6, msplit, 1)
 c     If the node is terminal, move on.  Otherwise, split.
+ 
          if (jstat .eq. -1) then
             nodestatus(kbuild) = -1
             goto 30
          else
             bestvar(kbuild) = msplit
+c         varDebug(11) = 1000          
+c         varDebug(12) = flagReg       
+c      mark the variable being splitted as 1; new      
+         if (flagReg .eq. 1) then  
+            varUsedAll(msplit) = 1 
+c            varDebug(10) = msplit             
+         endif         
             iv(msplit) = 1
             if (decsplit .lt. 0.0) decsplit = 0.0
             tgini(msplit) = tgini(msplit) + decsplit
@@ -158,7 +171,6 @@

 c     form prediction in terminal nodes
       do kn = 1, ndbigtree
-      
          if (nodestatus(kn) .eq. -1) then
             pp = 0
             ntie = 1
@@ -166,17 +178,16 @@
                if (classpop(j,kn) .gt. pp) then
                   nodeclass(kn) = j
                   pp = classpop(j,kn)
-                  ntie = 1
                end if
 c     Break ties at random:
                if (classpop(j,kn) .eq. pp) then
+                  ntie = ntie + 1
                   call rrand(xrand)
                   if (xrand .lt. 1.0 / ntie) then
                      nodeclass(kn)=j
                      pp=classpop(j,kn)
                   end if
-                  ntie = ntie + 1
-               end if                     
+               end if
             end do
          end if
 c         call intpr("node", 4, kn, 1)
@@ -197,13 +208,15 @@
       subroutine findbestsplit(a, b, cl, mdim, nsample, nclass, cat,
      1     maxcat, ndstart, ndend, tclasspop, tclasscat, msplit,
      2     decsplit, nbest, ncase, jstat, mtry, win, wr, wl,
-     3     mred, mind)
+     3     mred, mind,varUsedAll,coefReg, flagReg)
       implicit double precision(a-h,o-z)
       integer a(mdim,nsample), cl(nsample), cat(mdim),
-     1     ncase(nsample), b(mdim,nsample), nn, j
+     1     ncase(nsample), b(mdim,nsample), nn, j, tempFlagReg, debug
       double precision tclasspop(nclass), tclasscat(nclass,32), dn(32),
-     1     win(nsample), wr(nclass), wl(nclass), xrand
-      integer mind(mred), ncmax, ncsplit,nhit, ntie
+     1     win(nsample), wr(nclass), wl(nclass), xrand, coefReg(mred)
+      integer mind(mred), ncmax, ncsplit,nhit, ntie    
+      integer flagReg,mtryCounter , varUsedAll(mred)
+      double precision tempCoefReg 
       ncmax = 10
       ncsplit = 512
 c     compute initial values of numerator and denominator of Gini
@@ -215,6 +228,7 @@
       end do
       crit0 = pno / pdo
       jstat = 0
+  

 c     start main loop through variables to find best split
       critmax = -1.0e25
@@ -222,8 +236,10 @@
          mind(k) = k
       end do
       nn = mred
+      mtryCounter=0          
 c     sampling mtry variables w/o replacement.
-      do mt = 1, mtry
+c    do mt = 1, mtry  new use mred instead of mtry to include the variables new
+      do mt = 1, mred
          call rrand(xrand)
          j = int(nn * xrand) + 1
          mvar = mind(j)
@@ -231,6 +247,15 @@
          mind(nn) = mvar
          nn = nn - 1
          lcat = cat(mvar)
+c      if already use mtry random variables, then next; if not then plus1 new
+         if (varUsedAll(mvar).eq.0 .and. mtryCounter .ge. mtry) then 
+            CYCLE
+         end if
+c       when this variable not selected    yet or flagReg=0 so that the indicator variable is always 0  
+         if (varUsedAll(mvar).eq.0) then 
+            mtryCounter = mtryCounter + 1
+         end if
+        
          if (lcat .eq. 1) then
 c     Split on a numerical predictor.
             rrn = pno
@@ -256,22 +281,30 @@
 c     If neither nodes is empty, check the split.
                   if (dmin1(rrd, rld) .gt. 1.0e-5) then
                      crit = (rln / rld) + (rrn / rrd)
+c    If regularization and not used, then regularize new       
+c                      varDebug(3) = flagReg
+c                      varDebug(4) = varUsedAll(mvar)
+c                      varDebug(5) = mvar
+                    if(flagReg.eq.1 .and. varUsedAll(mvar).eq.0)then 
+c                      varDebug(5) = 9999
+                       crit = coefReg(mvar)*crit
+                       debug = 1
+                    end if
                      if (crit .gt. critmax) then
                         nbest = nsp
                         critmax = crit
                         msplit = mvar
-                        ntie = 1
                      end if
 c     Break ties at random:
                      if (crit .eq. critmax) then
+                        ntie = ntie + 1
                         call rrand(xrand)
                         if (xrand .lt. 1.0 / ntie) then
                            nbest = nsp
                            critmax = crit
                            msplit = mvar
                         end if
-                        ntie = ntie + 1
-                     end if                     
+                     end if
                   end if
                end if
             end do
@@ -292,14 +325,23 @@
                dn(i) = su
                if(su .gt. 0) nnz = nnz + 1
             end do
-            nhit = 0
+            nhit = 0           
+c          If regularization and not used, then regularize new 
+              tempFlagReg=0
+              tempCoefReg=0
+            if(flagReg.eq.1 .and. varUsedAll(mvar).eq.0) then 
+                 tempFlagReg = 1
+                 tempCoefReg = coefReg(mvar)
+            end if     
             if (nnz .gt. 1) then
                if (nclass .eq. 2 .and. lcat .gt. ncmax) then
                   call catmaxb(pdo, tclasscat, tclasspop, nclass,
-     &                 lcat, nbest, critmax, nhit, dn)
+     &                 lcat, nbest, critmax, nhit, dn,tempCoefReg, 
+     &                 tempFlagReg) 
                else
                   call catmax(pdo, tclasscat, tclasspop, nclass, lcat,
-     &                 nbest, critmax, nhit, maxcat, ncmax, ncsplit)
+     &                 nbest, critmax, nhit, maxcat, ncmax, ncsplit,
+     &                 tempCoefReg, tempFlagReg)    
                end if
                if (nhit .eq. 1) msplit = mvar
 c            else