Closed azmfaridee closed 11 years ago
I have been trying to find out the differences between the Random Forest and Regularized Random Forest R package source codes. Both of them has src
folder with the following structure:
src/
├── classTree.c
├── regTree.c
├── regrf.c
├── rf.c
├── rf.h
├── rfsub.f
└── rfutils.c
Running FileMerge between them shows off the basic difference between the algos (RRF just adding some new parameters)
diff -u RandomForestResouces/randomForest/src/classTree.c RegulalizedRandormForestResouces/RRF/src/classTree.c
--- RandomForestResouces/randomForest/src/classTree.c 2012-01-06 20:15:31.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/classTree.c 2012-04-09 06:22:33.000000000 +0600
@@ -22,7 +22,8 @@
double *bestsplitnext, double *tgini, int *nodeStatus,
int *nodePop, int *nodeStart, double *tclassPop, int maxNodes,
int nodeSize, int *ncase, int *inBag, int mTry, int *varUsed,
- int *nodeClass, int *treeSize, double *win) {
+ int *nodeClass, int *treeSize, double *win,
+ int *varUsedAll, double *coefReg, int *flagReg) {
/* Buildtree consists of repeated calls to two subroutines, Findbestsplit
and Movedata. Findbestsplit does just that--it finds the best split of
the current node. Movedata moves the data in the split node right and
@@ -78,13 +79,15 @@
F77_CALL(findbestsplit)(a, b, cl, mdim, nsample, nclass, cat,
ndstart, ndend, tclassPop, tclasscat,
&msplit, &decsplit, &nbest, ncase, &jstat,
- inBag, mTry, win, wr, wc, wl, mred, i, mind);
+ inBag, mTry, win, wr, wc, wl, mred, i, mind, varUsedAll,coefReg,flagReg);
if (jstat == 1) {
nodeStatus[i] = NODE_TERMINAL;
continue;
} else {
+ //continue;//new debug
bestvar[i] = msplit;
varUsed[msplit - 1] = 1;
+ //varUsedAll[msplit - 1] = 1;
tgini[msplit - 1] += decsplit;
if (cat[msplit-1] == 1) {
bestsplit[i] = a[msplit - 1 + nbest * mdim];
@@ -144,20 +147,15 @@
for (k = 0; k < ndbigtree; ++k) {
if (nodeStatus[k] == NODE_TERMINAL) {
pp = 0;
- ntie = 1;
for (j = 0; j < nclass; ++j) {
if (classPop[j + k * nclass] > pp) {
nodeClass[k] = j;
pp = classPop[j + k * nclass];
- ntie = 1;
}
/* Break ties at random: */
- if (classPop[j + k * nclass] == pp) {
- if (unif_rand() < 1.0 / ntie) {
- nodeClass[k] = j;
- pp = classPop[j + k * nclass];
- }
- ntie++;
+ if (classPop[j + k * nclass] == pp && unif_rand() > 0.5) {
+ nodeClass[k] = j;
+ pp = classPop[j + k * nclass];
}
}
}
@@ -242,16 +240,15 @@
*bestSplit = j;
critmax = crit;
*splitVar = mvar;
- ntie = 1;
}
/* Break ties at random: */
if (crit == critmax) {
- if (unif_rand() < 1.0 / ntie) {
- *bestSplit = j;
- critmax = crit;
- *splitVar = mvar;
- }
- ntie++;
+ ntie++;
+ if (unif_rand() > 1.0 / ntie) {
+ *bestSplit = j;
+ critmax = crit;
+ *splitVar = mvar;
+ }
}
}
}
@@ -295,7 +292,7 @@
void F77_NAME(catmax)(double *parentDen, double *tclasscat,
double *tclasspop, int *nclass, int *lcat,
unsigned int *ncatsp, double *critmax, int *nhit,
- int *maxcat, int *ncmax, int *ncsplit) {
+ int *maxcat, int *ncmax, int *ncsplit,double *coefReg, int *flagReg) {
/* This finds the best split of a categorical variable with lcat
categories and nclass classes, where tclasscat(j, k) is the number
of cases in class j with category value k. The method uses an
@@ -342,6 +339,8 @@
rightNum += leftCatClassCount[j] * leftCatClassCount[j];
}
decGini = (leftNum / leftDen) + (rightNum / (*parentDen - leftDen));
+//If regularization and not used, then regularize new
+ if((*flagReg)==1)decGini = (*coefReg) * decGini;
if (decGini > *critmax) {
*critmax = decGini;
*nhit = 1;
@@ -356,7 +355,7 @@
/* Find best split of with categorical variable when there are two classes */
void F77_NAME(catmaxb)(double *totalWt, double *tclasscat, double *classCount,
int *nclass, int *nCat, unsigned int *nbest, double *critmax,
- int *nhit, double *catCount) {
+ int *nhit, double *catCount, double *coefReg, int *flagReg) {
double catProportion[32], cp[32], cm[32];
int kcat[32];
@@ -391,6 +390,10 @@
/* If neither node is empty, check the split. */
if (rightDen > 1.0e-5 && leftDen > 1.0e-5) {
crit = (leftNum / leftDen) + (rightNum / rightDen);
+
+ //If regularization and not used, then regularize new
+ if(*flagReg==1)crit = (*coefReg) * crit;
+
if (crit > *critmax) {
*critmax = crit;
bestsplit = .5 * (catProportion[i] + catProportion[i + 1]);
diff -u RandomForestResouces/randomForest/src/rf.c RegulalizedRandormForestResouces/RRF/src/rf.c
--- RandomForestResouces/randomForest/src/rf.c 2012-01-06 20:15:31.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/rf.c 2012-04-09 06:22:33.000000000 +0600
@@ -1,6 +1,6 @@
/*****************************************************************
Copyright (C) 2001-9 Leo Breiman, Adele Cutler and Merck & Co., Inc.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
@@ -19,6 +19,8 @@
Re-written from the original main program in Fortran.
Andy Liaw Feb. 7, 2002.
Modifications to get the forest out Matt Wiener Feb. 26, 2002.
+
+Extented to regularized random forest by Houtao Deng Oct. 27, 2011
*****************************************************************/
#include <R.h>
@@ -44,7 +46,7 @@
int *nodeclass, double *xbestsplit, double *errtr,
int *testdat, double *xts, int *clts, int *nts, double *countts,
int *outclts, int *labelts, double *proxts, double *errts,
- int *inbag) {
+ int *inbag, double *coefReg, int *flagReg0) {
/******************************************************************
* C wrapper for random forests: get input from R and drive
* the Fortran routines.
@@ -75,6 +77,8 @@
* nodesize: minimum node size: no node with fewer than ndsize
* cases will be split
*
+ * coefReg: coefficient for the regularized gain
+ * flagReg: 1=regularization; 0: without regularization
* Output:
*
* outcl: class predicted by RF
@@ -90,13 +94,16 @@
int jb, j, n, m, k, idxByNnode, idxByNsample, imp, localImp, iprox,
oobprox, keepf, replace, stratify, trace, *nright,
*nrightimp, *nout, *nclts, Ntree;
-
+ //double coefReg;//new
+ int flagReg;//new
int *out, *bestsplitnext, *bestsplit, *nodepop, *jin, *nodex,
*nodexts, *nodestart, *ta, *ncase, *jerr, *varUsed,
*jtr, *classFreq, *idmove, *jvr,
*at, *a, *b, *mind, *nind, *jts, *oobpair;
- int **strata_idx, *strata_size, last, ktmp, nEmpty, ntry;
-
+ int **strata_idx, *strata_size, last, ktmp, anyEmpty, ntry;
+
+ int *varUsedAll; // the variable used in all previous splits
+
double av=0.0;
double *tgini, *tx, *wl, *classpop, *tclasscat, *tclasspop, *win,
@@ -119,6 +126,14 @@
Ntree = *ntree;
mtry = *nvar;
ntest = *nts;
+ flagReg = *flagReg0;
+ //coefReg[0]=0.068;
+
+ //zeroInt(varDebug, mdim);
+ //varUsedAll[1]=10;//debug new
+ //varDebug=100;
+ //printf("%d\n",varDebug);
+
nsample = addClass ? (nsample0 + nsample0) : nsample0;
mimp = imp ? mdim : 1;
nimp = imp ? nsample : 1;
@@ -147,6 +162,7 @@
ncase = (int *) S_alloc(nsample, sizeof(int));
jerr = (int *) S_alloc(nsample, sizeof(int));
varUsed = (int *) S_alloc(mdim, sizeof(int));
+ varUsedAll = (int *) S_alloc(mdim, sizeof(int));
jtr = (int *) S_alloc(nsample, sizeof(int));
jvr = (int *) S_alloc(nsample, sizeof(int));
classFreq = (int *) S_alloc(nclass, sizeof(int));
@@ -162,7 +178,9 @@
if (oobprox) {
oobpair = (int *) S_alloc(near*near, sizeof(int));
}
-
+
+ zeroInt(varUsedAll, mdim);
+
/* Count number of cases in each class. */
zeroInt(classFreq, nclass);
for (n = 0; n < nsample; ++n) classFreq[cl[n] - 1] ++;
@@ -192,7 +210,8 @@
} else {
nind = replace ? NULL : (int *) S_alloc(nsample, sizeof(int));
}
-
+
+
/* INITIALIZE FOR RUN */
if (*testdat) zeroDouble(countts, ntest * nclass);
zeroInt(counttr, nclass * nsample);
@@ -233,16 +252,22 @@
}
idxByNnode = 0;
idxByNsample = 0;
+
+
+ //zeroInt(varUsedAll, mdim); //initialize the regularization vector new
+
+ //varDebug[19] = flagReg;
+
for (jb = 0; jb < Ntree; jb++) {
/* Do we need to simulate data for the second class? */
- if (addClass) createClass(x, nsample0, nsample, mdim);
+ if (addClass) createClass(x, nsample0, nsample, mdim);
do {
zeroInt(nodestatus + idxByNnode, *nrnodes);
zeroInt(treemap + 2*idxByNnode, 2 * *nrnodes);
zeroDouble(xbestsplit + idxByNnode, *nrnodes);
- zeroInt(nodeclass + idxByNnode, *nrnodes);
+ zeroInt(nodeclass + idxByNnode, *nrnodes);
zeroInt(varUsed, mdim);
- /* TODO: Put all sampling code into a function. */
+ /* TODO: Put all sampling code into a function. */
/* drawSample(sampsize, nsample, ); */
if (stratify) { /* stratified sampling */
zeroInt(jin, nsample);
@@ -280,9 +305,9 @@
}
}
} else { /* unstratified sampling */
+ anyEmpty = 0;
ntry = 0;
do {
- nEmpty = 0;
zeroInt(jin, nsample);
zeroDouble(tclasspop, nclass);
zeroDouble(win, nsample);
@@ -308,12 +333,10 @@
}
/* check if any class is missing in the sample */
for (n = 0; n < nclass; ++n) {
- if (tclasspop[n] == 0.0) nEmpty++;
+ if (tclasspop[n] == 0) anyEmpty = 1;
}
ntry++;
- } while (nclass - nEmpty < 2 && ntry <= 30);
- /* If there are still fewer than two classes in the data, throw an error. */
- if (nclass - nEmpty < 2) error("Still have fewer than two classes in the in-bag sample after 30 attempts.");
+ } while (anyEmpty && ntry <= 10);
}
/* If need to keep indices of inbag data, do that here. */
@@ -322,11 +345,17 @@
inbag[n + idxByNsample] = jin[n];
}
}
+
+ //int debug;for (n = 0; n < 10000000000; n++) debug =1;
+
/* Copy the original a matrix back. */
memcpy(a, at, sizeof(int) * mdim * nsample);
modA(a, &nuse, nsample, mdim, cat, *maxcat, ncase, jin);
-
+ //varUsedAll[1]=1000; //debug new
+ //varDebug[1]=1000;
+ //varDebug[20] = flagReg;
+ //varDebug[21] = &flagReg;
F77_CALL(buildtree)(a, b, cl, cat, maxcat, &mdim, &nsample,
&nclass,
treemap + 2*idxByNnode, bestvar + idxByNnode,
@@ -334,16 +363,18 @@
nodestatus + idxByNnode, nodepop,
nodestart, classpop, tclasspop, tclasscat,
ta, nrnodes, idmove, &ndsize, ncase,
- &mtry, varUsed, nodeclass + idxByNnode,
+ &mtry, varUsed,nodeclass + idxByNnode,
ndbigtree + jb, win, wr, wl, &mdim,
- &nuse, mind);
+ &nuse, mind,varUsedAll,coefReg, &flagReg);
/* if the "tree" has only the root node, start over */
} while (ndbigtree[jb] == 1);
+ //int debug;for (n = 0; n < 10000000000; n++) debug =1;
+
Xtranslate(x, mdim, *nrnodes, nsample, bestvar + idxByNnode,
bestsplit, bestsplitnext, xbestsplit + idxByNnode,
nodestatus + idxByNnode, cat, ndbigtree[jb]);
-
+
/* Get test set error */
if (*testdat) {
predictClassTree(xts, ntest, mdim, treemap + 2*idxByNnode,
@@ -548,7 +579,8 @@
double *pid, double *cutoff, double *countts, int *treemap,
int *nodestatus, int *cat, int *nodeclass, int *jts,
int *jet, int *bestvar, int *node, int *treeSize,
- int *keepPred, int *prox, double *proxMat, int *nodes) {
+ int *keepPred, int *prox, double *proxMat, int *nodes,
+ double *coefReg, int *flagReg) {
int j, n, n1, n2, idxNodes, offset1, offset2, *junk, ntie;
double crit, cmax;
@@ -557,6 +589,7 @@
offset1 = 0;
offset2 = 0;
junk = NULL;
+
for (j = 0; j < *ntree; ++j) {
/* predict by the j-th tree */
@@ -587,12 +620,11 @@
if (crit > cmax) {
jet[n] = j + 1;
cmax = crit;
- ntie = 1;
}
/* Break ties at random: */
if (crit == cmax) {
- if (unif_rand() < 1.0 / ntie) jet[n] = j + 1;
ntie++;
+ if (unif_rand() > 1.0 / ntie) jet[n] = j + 1;
}
}
}
@@ -633,23 +665,22 @@
smaxtr = 0.0;
ntie = 1;
for (j = 0; j < nclass; ++j) {
- qq = (((double) counttr[j + n*nclass]) / out[n]) / cutoff[j];
- if (j+1 != cl[n]) smax = (qq > smax) ? qq : smax;
- /* if vote / cutoff is larger than current max, re-set max and
- change predicted class to the current class */
- if (qq > smaxtr) {
- smaxtr = qq;
- jest[n] = j+1;
- ntie = 1;
- }
- /* break tie at random */
- if (qq == smaxtr) {
- if (unif_rand() < 1.0 / ntie) {
- smaxtr = qq;
- jest[n] = j+1;
- }
- ntie++;
- }
+ qq = (((double) counttr[j + n*nclass]) / out[n]) / cutoff[j];
+ if (j+1 != cl[n]) smax = (qq > smax) ? qq : smax;
+ /* if vote / cutoff is larger than current max, re-set max and
+ change predicted class to the current class */
+ if (qq > smaxtr) {
+ smaxtr = qq;
+ jest[n] = j+1;
+ }
+ /* break tie at random */
+ if (qq == smaxtr) {
+ ntie++;
+ if (unif_rand() > 1.0 / ntie) {
+ smaxtr = qq;
+ jest[n] = j+1;
+ }
+ }
}
if (jest[n] != cl[n]) {
errtr[cl[n]] += 1.0;
@@ -680,15 +711,14 @@
if (crit > cmax) {
jet[n] = j+1;
cmax = crit;
- ntie = 1;
}
/* Break ties at random: */
if (crit == cmax) {
- if (unif_rand() < 1.0 / ntie) {
+ ntie++;
+ if (unif_rand() > 1.0 / ntie) {
jet[n] = j+1;
cmax = crit;
}
- ntie++;
}
}
}
diff -u RandomForestResouces/randomForest/src/rf.h RegulalizedRandormForestResouces/RRF/src/rf.h
--- RandomForestResouces/randomForest/src/rf.h 2012-01-06 20:15:32.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/rf.h 2012-04-09 06:22:33.000000000 +0600
@@ -38,7 +38,7 @@
double *pid, double *cutoff, double *countts, int *treemap,
int *nodestatus, int *cat, int *nodeclass, int *jts,
int *jet, int *bestvar, int *nodexts, int *ndbigtree,
- int *keepPred, int *prox, double *proxmatrix, int *nodes);
+ int *keepPred, int *prox, double *proxmatrix, int *nodes, double *coefReg, int *flagReg);
void regTree(double *x, double *y, int mdim, int nsample,
int *lDaughter, int *rDaughter, double *upper, double *avnode,
@@ -92,7 +92,7 @@
int *ta, int *nrnodes, int *,
int *, int *, int *, int *, int *, int *,
double *, double *, double *,
- int *, int *, int *);
+ int *, int *, int *,int *varUsedAll, double *coefReg, int *flagReg);
/* Node status */
#define NODE_TERMINAL -1
diff -u RandomForestResouces/randomForest/src/rfsub.f RegulalizedRandormForestResouces/RRF/src/rfsub.f
--- RandomForestResouces/randomForest/src/rfsub.f 2012-01-06 20:15:32.000000000 +0600
+++ RegulalizedRandormForestResouces/RRF/src/rfsub.f 2012-04-09 06:22:33.000000000 +0600
@@ -21,7 +21,8 @@
1 nclass, treemap, bestvar, bestsplit, bestsplitnext, tgini,
1 nodestatus,nodepop, nodestart, classpop, tclasspop,
1 tclasscat,ta,nrnodes, idmove, ndsize, ncase, mtry, iv,
- 1 nodeclass, ndbigtree, win, wr, wl, mred, nuse, mind)
+ 1 nodeclass, ndbigtree, win, wr, wl, mred, nuse, mind,
+ 1 varUsedAll,coefReg, flagReg)
c Buildtree consists of repeated calls to two subroutines, Findbestsplit
c and Movedata. Findbestsplit does just that--it finds the best split of
@@ -45,12 +46,12 @@
1 nodepop(nrnodes), nodestart(nrnodes),
1 bestsplitnext(nrnodes), idmove(nsample),
1 ncase(nsample), b(mdim,nsample),
- 1 iv(mred), nodeclass(nrnodes), mind(mred)
-
+ 1 iv(mred), nodeclass(nrnodes), mind(mred),varUsedAll(mred)
+
double precision tclasspop(nclass), classpop(nclass, nrnodes),
1 tclasscat(nclass, 32), win(nsample), wr(nclass),
- 1 wl(nclass), tgini(mdim), xrand
- integer msplit, ntie
+ 1 wl(nclass), tgini(mdim), xrand, coefReg(mred)
+ integer msplit, ntie, flagReg
msplit = 0
call zerv(nodestatus,nrnodes)
@@ -58,6 +59,9 @@
call zerv(nodepop,nrnodes)
call zermr(classpop,nclass,nrnodes)
+
+
+
do j=1,nclass
classpop(j, 1) = tclasspop(j)
end do
@@ -81,15 +85,24 @@
call findbestsplit(a,b,cl,mdim,nsample,nclass,cat,maxcat,
1 ndstart, ndend,tclasspop,tclasscat,msplit, decsplit,
- 1 nbest,ncase, jstat,mtry,win,wr,wl,mred,mind)
+ 1 nbest,ncase, jstat,mtry,win,wr,wl,mred,mind,
+ 1 varUsedAll,coefReg, flagReg)
c call intpr("jstat", 5, jstat, 1)
c call intpr("msplit", 6, msplit, 1)
c If the node is terminal, move on. Otherwise, split.
+
if (jstat .eq. -1) then
nodestatus(kbuild) = -1
goto 30
else
bestvar(kbuild) = msplit
+c varDebug(11) = 1000
+c varDebug(12) = flagReg
+c mark the variable being splitted as 1; new
+ if (flagReg .eq. 1) then
+ varUsedAll(msplit) = 1
+c varDebug(10) = msplit
+ endif
iv(msplit) = 1
if (decsplit .lt. 0.0) decsplit = 0.0
tgini(msplit) = tgini(msplit) + decsplit
@@ -158,7 +171,6 @@
c form prediction in terminal nodes
do kn = 1, ndbigtree
-
if (nodestatus(kn) .eq. -1) then
pp = 0
ntie = 1
@@ -166,17 +178,16 @@
if (classpop(j,kn) .gt. pp) then
nodeclass(kn) = j
pp = classpop(j,kn)
- ntie = 1
end if
c Break ties at random:
if (classpop(j,kn) .eq. pp) then
+ ntie = ntie + 1
call rrand(xrand)
if (xrand .lt. 1.0 / ntie) then
nodeclass(kn)=j
pp=classpop(j,kn)
end if
- ntie = ntie + 1
- end if
+ end if
end do
end if
c call intpr("node", 4, kn, 1)
@@ -197,13 +208,15 @@
subroutine findbestsplit(a, b, cl, mdim, nsample, nclass, cat,
1 maxcat, ndstart, ndend, tclasspop, tclasscat, msplit,
2 decsplit, nbest, ncase, jstat, mtry, win, wr, wl,
- 3 mred, mind)
+ 3 mred, mind,varUsedAll,coefReg, flagReg)
implicit double precision(a-h,o-z)
integer a(mdim,nsample), cl(nsample), cat(mdim),
- 1 ncase(nsample), b(mdim,nsample), nn, j
+ 1 ncase(nsample), b(mdim,nsample), nn, j, tempFlagReg, debug
double precision tclasspop(nclass), tclasscat(nclass,32), dn(32),
- 1 win(nsample), wr(nclass), wl(nclass), xrand
- integer mind(mred), ncmax, ncsplit,nhit, ntie
+ 1 win(nsample), wr(nclass), wl(nclass), xrand, coefReg(mred)
+ integer mind(mred), ncmax, ncsplit,nhit, ntie
+ integer flagReg,mtryCounter , varUsedAll(mred)
+ double precision tempCoefReg
ncmax = 10
ncsplit = 512
c compute initial values of numerator and denominator of Gini
@@ -215,6 +228,7 @@
end do
crit0 = pno / pdo
jstat = 0
+
c start main loop through variables to find best split
critmax = -1.0e25
@@ -222,8 +236,10 @@
mind(k) = k
end do
nn = mred
+ mtryCounter=0
c sampling mtry variables w/o replacement.
- do mt = 1, mtry
+c do mt = 1, mtry new use mred instead of mtry to include the variables new
+ do mt = 1, mred
call rrand(xrand)
j = int(nn * xrand) + 1
mvar = mind(j)
@@ -231,6 +247,15 @@
mind(nn) = mvar
nn = nn - 1
lcat = cat(mvar)
+c if already use mtry random variables, then next; if not then plus1 new
+ if (varUsedAll(mvar).eq.0 .and. mtryCounter .ge. mtry) then
+ CYCLE
+ end if
+c when this variable not selected yet or flagReg=0 so that the indicator variable is always 0
+ if (varUsedAll(mvar).eq.0) then
+ mtryCounter = mtryCounter + 1
+ end if
+
if (lcat .eq. 1) then
c Split on a numerical predictor.
rrn = pno
@@ -256,22 +281,30 @@
c If neither nodes is empty, check the split.
if (dmin1(rrd, rld) .gt. 1.0e-5) then
crit = (rln / rld) + (rrn / rrd)
+c If regularization and not used, then regularize new
+c varDebug(3) = flagReg
+c varDebug(4) = varUsedAll(mvar)
+c varDebug(5) = mvar
+ if(flagReg.eq.1 .and. varUsedAll(mvar).eq.0)then
+c varDebug(5) = 9999
+ crit = coefReg(mvar)*crit
+ debug = 1
+ end if
if (crit .gt. critmax) then
nbest = nsp
critmax = crit
msplit = mvar
- ntie = 1
end if
c Break ties at random:
if (crit .eq. critmax) then
+ ntie = ntie + 1
call rrand(xrand)
if (xrand .lt. 1.0 / ntie) then
nbest = nsp
critmax = crit
msplit = mvar
end if
- ntie = ntie + 1
- end if
+ end if
end if
end if
end do
@@ -292,14 +325,23 @@
dn(i) = su
if(su .gt. 0) nnz = nnz + 1
end do
- nhit = 0
+ nhit = 0
+c If regularization and not used, then regularize new
+ tempFlagReg=0
+ tempCoefReg=0
+ if(flagReg.eq.1 .and. varUsedAll(mvar).eq.0) then
+ tempFlagReg = 1
+ tempCoefReg = coefReg(mvar)
+ end if
if (nnz .gt. 1) then
if (nclass .eq. 2 .and. lcat .gt. ncmax) then
call catmaxb(pdo, tclasscat, tclasspop, nclass,
- & lcat, nbest, critmax, nhit, dn)
+ & lcat, nbest, critmax, nhit, dn,tempCoefReg,
+ & tempFlagReg)
else
call catmax(pdo, tclasscat, tclasspop, nclass, lcat,
- & nbest, critmax, nhit, maxcat, ncmax, ncsplit)
+ & nbest, critmax, nhit, maxcat, ncmax, ncsplit,
+ & tempCoefReg, tempFlagReg)
end if
if (nhit .eq. 1) msplit = mvar
c else
Parent Issue #11
Outline the algorithm described in the the paper Feature Selection via Regularized Trees. Also examine the R packages the authors described and find out how useful it can be for us.