public FrmCategoricalSplitProperties(Node _node, Predictor _pred)
        {
            node = _node;
            pred = _pred;
            List<string> caseLst;

            InitializeComponent();

            //If a split could not be defined, leave the 1st on the left and the rest on the right
            if (pred.ChildrenGroups == null) {
                //Set the possible children
                MessageBox.Show("This split has not been computed", "Warning!");
                ValueGroup valueGroup = new ValueGroup(_pred, 2);
                pred.ChildrenGroups = valueGroup;

                lbLeft.Items.Add(pred.ValueSd.Keys[0]);
                for (int i = 1; i < pred.DistinctValuesCount; ++i) {
                       lbRight.Items.Add(pred.ValueSd.Keys[i]);
                }
                checks();
                return;
            }

            caseLst = pred.ChildrenGroups.ValueGroupLst[0];
            for (int i = 0; i < caseLst.Count; ++i) {
                  lbLeft.Items.Add(caseLst[i]);
            }

            caseLst = pred.ChildrenGroups.ValueGroupLst[1];
            for (int i = 0; i < caseLst.Count; ++i) {
                  lbRight.Items.Add(caseLst[i]);
            }
            checks();
        }
Ejemplo n.º 2
0
 public ValueGroup(Predictor _pred, int _groupCount)
 {
     pred=_pred;
     List<string> ValueLst;
     groupCount = _groupCount;
     ValueGroupLst = new List<List<string>>(groupCount);
     for (int i = 0; i < groupCount; ++i) {
         ValueLst = new List<string>();
         ValueGroupLst.Add(ValueLst);
     }
 }
Ejemplo n.º 3
0
        /// Sets the best p.SplitHyperplane and p.Gain
        /// <summary>
        /// Look for the local minimum by fixing all the coeficients except one and then vary it
        /// </summary>
        public static double MinImpMvGreed(NodeTargetCategorical n, Predictor p)
        {
            bool hadOverallImprovement=false;
            //List<double> bestCoefLst = new List<double>();
            //List<double> testCoefLst = new List<double>(); // VariableToBeTestedCoodinates
            SortedList<string, int> leftNode = new SortedList<string, int>();
            SortedList<string, int> rightNode = new SortedList<string, int>();
            List<int> nonOptimisedVariableLst = new List<int>(); //Stores the index of MV predictors that have been used
            List<int> optimisedVariableLst = new List<int>(); //Stores the index of MV predictors that have been used
            // starts with the best split index
            List<double> azLst = new List<double>();
            n.C = 0; // (= best split coordinate) Constant of the equation A1X1 + A2X2 + C = V [0] normalised [1] original value
            double lowestImp = n.ImpBestUniSplit;
            double bestCoef=-1;
            double bestMvGain;
            double bestUniGain;
            int bestSplitIdx=-1;
            SortedList<double, int> orderedNominalGain = new SortedList<double, int>(); // value, index or PredictorLst
            SortedList<double, int> orderedNumericGain = new SortedList<double, int>(); // value, index or PredictorLst

            FrmGraph fg = new FrmGraph();

            bestUniGain = (n.Imp - n.ImpBestUniSplit) * 100 / n.Imp;
            bestUniGain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;

            Def.LogMessage += "Starting MinImpMvGreed, smallest univariate imp= " + lowestImp + " bestUniSplit gain " + n.BestSplit.Gain + " should be equal bestUniGain " + bestUniGain + Environment.NewLine;

            for(int i=0; i < n.PredMvLst.Count; ++i){
                n.PredMvLst[i].Optimised = false;
                n.PredMvLst[i].Coef = Def.AbsentCoefficientValue;
                nonOptimisedVariableLst.Add(i);
            }
            if (n.BestSplit.Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                //Sets the properties of the best univariate split
                #region
                nonOptimisedVariableLst.Remove(n.BestSplit.PredMvBase);
                optimisedVariableLst.Add(n.BestSplit.PredMvBase);
                n.PredMvLst[n.BestSplit.PredMvBase].Coef = 1;
                n.PredMvLst[n.BestSplit.PredMvBase].Optimised = true;
                n.C = -Fcn.NormP1(n.BestSplit.SplitValue, n.BestSplit.LowerNumber, n.BestSplit.HigherNumber);
               // c = 0;
            #endregion
            }else{//Nominal
                List<string> caseLst;
                n.C = 0;
                //Sets the coefficients of the values on the left with the Def.PresentCoefficientValue
                caseLst = n.PredictorLst[n.BestSplit.PredictorLstIdx].ChildrenGroups.ValueGroupLst[0];
                for (int i = 0; i < caseLst.Count; ++i) {
                    //Not sure if it is right MessageBox.Show("didn't undestand bellow");
                    n.PredMvLst[n.BestSplit.PredMvBase + n.PredictorLst[n.BestSplit.PredictorLstIdx].ValueSd.IndexOfKey(caseLst[i])].Coef = Def.PresentCoefficientValue;
                }
                //If the best split is nominal, then all the column there describe it are already optmised
                for (int i = 0; i < n.PredictorLst[n.BestSplit.PredictorLstIdx].DistinctValuesCount; ++i) {
                    nonOptimisedVariableLst.Remove(n.BestSplit.PredMvBase + i);
                    optimisedVariableLst.Add(n.BestSplit.PredMvBase + i);
                    n.PredMvLst[n.BestSplit.PredMvBase + i].Optimised = true;
                 }
            }

            //if (n.BestSplit.Gain == 100) {
            //    Def.LogMessage += "Nothing to be done, univariate gain equals 100%" + Environment.NewLine;
            //    n.MvTb.DataFill();
            //    DataValidate(n, c);
            //    n.MvTb.DataEmpty();
            //    return p.Gain;
            //}

            double[] r= new double[2];

            while(nonOptimisedVariableLst.Count > 0){
             //while(optimisedVariableLst.Count < 11){
                hadOverallImprovement=false;
                for (int nopVarIdx = 0; nopVarIdx < nonOptimisedVariableLst.Count; ++nopVarIdx) {
                    ////Gets lowestCatSplitImp, catSplitImp;
                    if (n.PredMvLst[nonOptimisedVariableLst[nopVarIdx]].Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) {
                        r = BestGainMvCategorical(n, n.C, nonOptimisedVariableLst[nopVarIdx], optimisedVariableLst);
                        if (r[1] < lowestImp) {
                            bestCoef = r[0];
                            bestSplitIdx = nonOptimisedVariableLst[nopVarIdx];
                            lowestImp = r[1];
                            hadOverallImprovement=true;
                        }
                    }
                    //Gets lowestNumSplitImp, numSplitImp;
                    else {
                        r = BestGainMvNumeric(n, n.C, nonOptimisedVariableLst[nopVarIdx], optimisedVariableLst, fg);
                        if (r[1] < lowestImp) {
                            bestCoef = r[0];
                            bestSplitIdx = nonOptimisedVariableLst[nopVarIdx];
                            lowestImp = r[1];
                            hadOverallImprovement=true;
                        }
                    }
                    if (n.PredMvLst[nonOptimisedVariableLst[nopVarIdx]].FieldSpan > 0)
                        Def.LogMessage += "     nopVarIdx " + nonOptimisedVariableLst[nopVarIdx] + " Name " + n.PredMvLst[nonOptimisedVariableLst[nopVarIdx]].Variable.Name + "(" + n.PredMvLst[nonOptimisedVariableLst[nopVarIdx]].Offset + ")";
                    else
                        Def.LogMessage += "     nopVarIdx " + nonOptimisedVariableLst[nopVarIdx] + " Name " + n.PredMvLst[nonOptimisedVariableLst[nopVarIdx]].Variable.Name;
                    Def.LogMessage += "     Coefficient " + r[0] + " SplitImp " + r[1] + Environment.NewLine;
                }//End for
                if (hadOverallImprovement) {
                    if (!nonOptimisedVariableLst.Contains(bestSplitIdx)) {
                        Def.LogMessage += "Error couldn't find bestSplitIdx " + bestSplitIdx + " inside nonOptimisedVariableLst " + Environment.NewLine;
                        MessageBox.Show("Error couldn't find bestSplitIdx " + bestSplitIdx + " inside nonOptimisedVariableLst ");
                        return -1;
                    }
                    n.PredMvLst[bestSplitIdx].Coef = bestCoef;
                    n.PredMvLst[bestSplitIdx].Optimised = true;
                    nonOptimisedVariableLst.Remove(bestSplitIdx);
                    optimisedVariableLst.Add(bestSplitIdx);
                    Def.LogMessage += "Best variable " + n.PredMvLst[bestSplitIdx].Variable.Name + n.PredMvLst[bestSplitIdx].Offset + " Coef " + bestCoef + " lowestCatSplitImp " + lowestImp + "++++++++++++++++++++" + Environment.NewLine;
                } else { //No improvement
                    Def.LogMessage += "No more improvement " + (optimisedVariableLst.Count) + " variables of " + n.PredMvLst.Count + " are being combined on the purity function -------------------" + Environment.NewLine;
                    foreach (int predMvIdx in nonOptimisedVariableLst) {
                        optimisedVariableLst.Add(predMvIdx);
                        n.PredMvLst[predMvIdx].Optimised = true;
                    }
                    nonOptimisedVariableLst.Clear();
                }
                if (lowestImp==0) {
                    Def.LogMessage += "Node is now 100% pure stopping greed seach " + Environment.NewLine;
                    break;
                }
            }//End while
            n.ImpBestMvSplit = lowestImp;

            p.Gain = n.BestSplit.Gain;

            bestMvGain = (n.Imp - n.ImpBestMvSplit) * 100 / n.Imp;
            bestMvGain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;

            bestUniGain = (n.Imp - n.ImpBestUniSplit) * 100 / n.Imp;
            bestUniGain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;

            string coefStr = "";
            for (int i = 0; i < n.PredMvLst.Count; ++i) {
                if (n.PredMvLst[i].Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous)
                    coefStr += n.PredMvLst[i].Coef + "(" + n.PredMvLst[i].Variable.Name + ") ";
                else
                    coefStr += n.PredMvLst[i].Coef + "(" + n.PredMvLst[i].Variable.Name + n.PredMvLst[i].Offset + ") ";
            }
            coefStr += "c = " + n.C;
            //Why UniGain is sometimes different to p.Gain
            Def.LogMessage += "MvGain: " + bestMvGain + " UniGain: " + bestUniGain + " P.Gain: " + p.Gain + " MvImp: " + lowestImp + " UniImp: " + n.ImpBestUniSplit + " NdImp: " + n.Imp + Environment.NewLine + coefStr + Environment.NewLine;

            if (!Def.ExperimentRunning) {
                for (int y = 0; y < n.Table.RowCount; ++y) {
                              fg.x0Lst.Add((float) n.PredMvLst[0].X(y));
                        fg.x1Lst.Add((float) n.PredMvLst[1].X(y));
                    if (n.MvTb.Data.TC[y].ToLower() == "n") {

                        fg.n.Add(y);
                    }
                }
                fg.a0=(float) n.PredMvLst[0].Coef;
                fg.a1 = (float) n.PredMvLst[1].Coef;
                fg.c = (float) n.C;
                fg.N = n;
                fg.ABest = (float)bestCoef;
                fg.Show();
                fg.Invalidate();

                DataValidate(n, n.C);
            }
            //if auto n.Imp =
            return p.Gain;
        }
Ejemplo n.º 4
0
        //Sets the best p.SplitValue p.Gain
        public static double MinImpCont(NodeTargetCategorical n, Predictor p)
        {
            int i, leftRowCount = 0, rightRowCount = 0;
            //            int dfd; //delete
            double minImp, imp, lImp, rImp, instanceCount = n.Table.RowCount;
            lImp = rImp = minImp = imp = double.NaN;
            List<N3T> AvcLst;
            List<int> thresholdIndexLst;
            //Tries each partition:

            //AvcLst[i].N0 = Value of the dependent varible
            //AvcLst[i].N1 = Frequency of y
            //AvcLst[i].N2 = Total of distinct registries until that row
            //AvcLst[i].T  = y

            string sql =
            @"SELECT ALL " +
                Def.DbBsTb + "." + p.Variable.Name + ", " +
                " count(" + Def.DbBsTb + "." + p.Variable.Name + "),0 , " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                " AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "ORDER BY " +
                Def.DbBsTb + "." + p.Variable.Name;
            AvcLst = Def.Db.GetN3TLst(sql);

            //N2 is the number of registries until a given row
            if (AvcLst.Count > 0) {
                AvcLst[0].N2 = AvcLst[0].N1;
                for (i = 1; i < AvcLst.Count; ++i)
                    AvcLst[i].N2 = AvcLst[i - 1].N2 + AvcLst[i].N1;
            }

            thresholdIndexLst = Fcn.SetPossibleThresholdIndexLst(AvcLst);
            if (thresholdIndexLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            n.DescendentImpPreCalculated = new List<double>(2);
            n.DescendentImpPreCalculated.Add(0);
            n.DescendentImpPreCalculated.Add(0);

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
            if (AvcLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }
            for (i = 0; i < thresholdIndexLst.Count; ++i) {
                lImp = ImpCont(0, thresholdIndexLst[i], AvcLst, out leftRowCount);
                rImp = ImpCont(thresholdIndexLst[i] + 1, AvcLst.Count - 1, AvcLst, out rightRowCount);
                if (Double.IsNaN(minImp) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
                    imp = (leftRowCount * lImp + rightRowCount * rImp) / (leftRowCount + rightRowCount);
                    if (Double.IsNaN(imp) == false) {
                        minImp = imp;
                        p.SplitValue = AvcLst[thresholdIndexLst[i]].N0;
                        n.DescendentImpPreCalculated[0] = lImp;
                        n.DescendentImpPreCalculated[1] = rImp;
                    }
                } else {
                    imp = (leftRowCount * lImp + rightRowCount * rImp) / (leftRowCount + rightRowCount);
                    if (imp < minImp && !Double.IsNaN(imp) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
                        minImp = imp;
                        p.SplitValue = AvcLst[thresholdIndexLst[i]].N0;
                        n.DescendentImpPreCalculated[0] = lImp;
                        n.DescendentImpPreCalculated[1] = rImp;
                    }
                }
            }
            if(Double.IsNaN(minImp)){
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            List<double> nLst;
            sql = @"
            SELECT
                DISTINCT " + p.Variable.Name + " " +
            "FROM "
                + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " AND " +
                p.Variable.Name + " IS NOT NULL " +
            "ORDER BY "
                + p.Variable.Name + " ASC ";
            nLst = Def.Db.GetNumberLst(sql);

            //Finds the angle bisector of the slit
            if (p.SplitValue != nLst[0] && p.SplitValue != nLst[nLst.Count - 2]) {
                for (i = 1; i < nLst.Count - 2; ++i) {
                    if (p.SplitValue == nLst[i]) {
                        p.SplitValue = (nLst[i - 1] + nLst[i + 1]) / 2;
                        break;
                    }
                }
            }

            p.ImpUniMin = minImp;
            p.Gain = (n.Imp - minImp) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
            return p.Gain;
        }
Ejemplo n.º 5
0
        public static double MinImpCatProgressive(NodeTargetCategorical n, Predictor p)
        {
            //For some partitions gets the min Impiance
            int valueCount = p.DistinctValuesCount;
            double impBest= double.NaN;
            int i;
            int instanceCount = n.Table.RowCount;
            double imp = Double.NaN;
            double impIfValueGoesLeft = Double.NaN, impIfValueGoesRight = Double.NaN;
            List<NTT> nttLst;
            double impBeforePhase2;
            int improvementCode = 0;

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;

            if (valueCount < 2) {
                p.SplitStatus = Predictor.SplitStatusEnum.OnlyOneValueAvailable;
                p.Gain = 0;
                return 0;
            }

            List<string> lComb = new List<string>(valueCount);
            List<string> rComb = new List<string>(valueCount);
            SortedList<string, int> lPredVal = new SortedList<string, int>();
            SortedList<string, int> rPredVal = new SortedList<string, int>();

            string sql =
            @"SELECT ALL " +
                " count(*), " +
                Def.DbBsTb + "." + p.Variable.Name + ",  " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " AND " +
                Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + " ";
            nttLst = Def.Db.GetNTTLst(sql);

            lComb.Clear(); rComb.Clear();
            lPredVal.Clear(); rPredVal.Clear();
            lComb.Add(p.ValueSd.Keys[0]);  // Adds the 1st value to the combination of the left node
            rComb.Add(p.ValueSd.Keys[1]);  // Adds the 2nd value to the combination of the right node
            //Done only if the number of values is 2
            if (valueCount == 2) {
                imp = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                p.ChildrenGroups.ValueGroupLst[0].Clear();
                p.ChildrenGroups.ValueGroupLst[1].Clear();
                if (Double.IsNaN(imp)) {
                    p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                    p.ImpUniMin = Double.NaN;
                    p.Gain = Double.NaN;
                    p.ChildrenGroups.ValueGroupLst[0].Clear();
                    p.ChildrenGroups.ValueGroupLst[1].Clear();
                    return 0;
                }
                foreach(string s in lComb){
                    p.ChildrenGroups.ValueGroupLst[0].Add(s);
                }
                foreach(string s in rComb){
                    p.ChildrenGroups.ValueGroupLst[1].Add(s);
                }
                n.ImpBestUniSplit = imp;
                p.Gain = (n.Imp - imp) * 100 / n.Imp;
                p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
                return p.Gain;
            }
            //IF valueCount > 2
            for (i = 2; i < valueCount; ++i) {
                //try to adding to the left
                lComb.Add(p.ValueSd.Keys[i]);
                impIfValueGoesLeft = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                //Changes the side
                rComb.Add(p.ValueSd.Keys[i]);
                lComb.RemoveAt(lComb.Count - 1);
                impIfValueGoesRight = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                if (!Double.IsNaN(impIfValueGoesLeft) && !Double.IsNaN(impIfValueGoesRight)) {
                    if (impIfValueGoesLeft < impIfValueGoesRight) {
                        imp = impIfValueGoesLeft;
                        lComb.Add(p.ValueSd.Keys[i]);
                        rComb.RemoveAt(rComb.Count - 1);
                    } else {
                        imp = impIfValueGoesRight;
                    }
                } else {
                    if (Double.IsNaN(impIfValueGoesLeft)) {
                        imp = impIfValueGoesRight;
                    }
                    if (Double.IsNaN(impIfValueGoesRight)) {
                        imp = impIfValueGoesLeft;
                        lComb.Add(p.ValueSd.Keys[i]);
                        rComb.RemoveAt(rComb.Count - 1);
                    }
                }
            }

                impBest = imp;
                if (Def.ClfOptimisationLevelForCatSearch >= 1) {

                    #region enhanced progressive phase 1

                    // 0 = no improvement
                    // 1 = 1
                    // 2 = 2
                    // 3 = 3

                    //if (valueCount > 2) {

                    // Final combinations for the two 1st values
                    // 1- Removes 1st val from left and put in the right
                    // 2- Removes 2nd val from right and send to left
                    // 3- Puts back the 1st val to left

                    // 0) Initial status
                    // left  0xxxxxx
                    // right 1xxxxxx

                    // 1)
                    // left  xxxxxx
                    // right 1xxxxxx0

                    // 2)
                    // left  xxxxxx1
                    // right xxxxxx0

                    // 3)
                    // left  xxxxxx10
                    // right xxxxxx

                    // 1
                    lComb.RemoveAt(0);
                    rComb.Add(p.ValueSd.Keys[0]);
                    imp = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                    if (imp < impBest) {
                        improvementCode = 1;
                        impBest = imp;
                    }

                    // 2
                    rComb.RemoveAt(0);
                    lComb.Add(p.ValueSd.Keys[1]);
                    imp = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                    if (imp < impBest) {
                        improvementCode = 2;
                        impBest = imp;
                    }

                    // 3
                    rComb.RemoveAt(rComb.Count - 1);
                    lComb.Add(p.ValueSd.Keys[0]);
                    imp = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                    if (imp < impBest) {
                        improvementCode = 3;
                        impBest = imp;
                    }

                    if (improvementCode == 0) {
                        rComb.Add(lComb[lComb.Count - 2]);
                        lComb.RemoveAt(lComb.Count - 2);
                    }
                    if (improvementCode == 1) {
                        lComb.RemoveAt(lComb.Count - 1);
                        lComb.RemoveAt(lComb.Count - 1);
                        rComb.Add(p.ValueSd.Keys[0]);
                        rComb.Add(p.ValueSd.Keys[1]);
                    }
                    if (improvementCode == 2) {
                        lComb.RemoveAt(lComb.Count - 1);
                        rComb.RemoveAt(rComb.Count - 1);
                        lComb.Add(p.ValueSd.Keys[1]);
                        rComb.Add(p.ValueSd.Keys[0]);
                    }

                    #endregion  enhanced progressive phase 1

                    #region enhanced progressive phase 2

                    if (Def.ClfOptimisationLevelForCatSearch >= 2) {
                        impBeforePhase2 = imp;
                        int lCombCount = lComb.Count;
                        for (int lidx = 0; lidx < lCombCount; ++lidx) {
                            rComb.Add(lComb[0]);
                            lComb.RemoveAt(0);

                            imp = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                            if (imp < impBest) {
                                impBest = imp;
                            } else {
                                lComb.Add(rComb[rComb.Count - 1]);
                                rComb.RemoveAt(rComb.Count - 1);
                            }
                        }

                        int rCombCount = rComb.Count;
                        for (int ridx = 0; ridx < rCombCount; ++ridx) {
                            lComb.Add(rComb[0]);
                            rComb.RemoveAt(0);

                            imp = fillPredVals(lPredVal, rPredVal, lComb, rComb, nttLst, n);
                            if (imp < impBest) {
                                impBest = imp;
                            } else {
                                rComb.Add(lComb[lComb.Count - 1]);
                                lComb.RemoveAt(lComb.Count - 1);
                            }
                        }
                    }//If optimisation >=2
                    #endregion phase 2
                }//If optimisation >=1

            if (Double.IsNaN(impBest)) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.ImpUniMin = Double.NaN;
                p.Gain = Double.NaN;
                p.ChildrenGroups.ValueGroupLst[0].Clear();
                p.ChildrenGroups.ValueGroupLst[1].Clear();
                return 0;
            }

            p.ChildrenGroups.ValueGroupLst[0].Clear();
            p.ChildrenGroups.ValueGroupLst[1].Clear();
            foreach(string s in lComb){
                p.ChildrenGroups.ValueGroupLst[0].Add(s);
            }
            foreach(string s in rComb){
                p.ChildrenGroups.ValueGroupLst[1].Add(s);
            }
            imp = impBest;
            p.ImpUniMin = imp;
            p.Gain = (n.Imp - imp) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
            return p.Gain;
        }
Ejemplo n.º 6
0
        //It the number of variables is too large it can't compute
        //public static double KILLED_MinImpCatRandom(NodeTargetCategorical n, Predictor p) {
        //    //For some partitions gets the min Impiance
        //    int valueCount = p.DistinctValuesCount;
        //    int pos, instanceI;
        //    double partitionCount;
        //    int c, i;
        //    double bestPartition = 0, minImp = Double.NaN, imp = Double.NaN, lImp = Double.NaN, rImp = Double.NaN;
        //    int nodeLfItemCount, nodeRtItemCount;
        //    List<NTT> nttLst;
        //    string binStr = "";
        //    p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
        //    n.DescendentImpPreCalculated = new List<double>(2);
        //    n.DescendentImpPreCalculated.Add(0);
        //    n.DescendentImpPreCalculated.Add(0);
        //    List<string> lComb = new List<string>(valueCount);
        //    List<string> rComb = new List<string>(valueCount);
        //    SortedList<string, int> lPredVal = new SortedList<string, int>();
        //    SortedList<string, int> rPredVal = new SortedList<string, int>();
        //    string sql =
        //    @"SELECT ALL " +
        //        " count(*), " +
        //        Def.DbBsTb + "." + p.Variable.Name + ",  " +
        //        Def.DbBsTb + "." + Def.Schema.Target.Name + " " +
        //    "FROM "
        //        + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
        //    "WHERE "
        //        + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
        //        Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " AND " +
        //        Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
        //    "GROUP BY " +
        //        Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
        //        Def.DbBsTb + "." + p.Variable.Name + " ";
        //    nttLst = Def.Db.GetNTTLst(sql);
        //    int instanceCount = n.Table.RowCount;
        //    partitionCount = Math.Pow(2, valueCount - 1) - 1;
        //    double partitionCountMax = 0;
        //    if (partitionCount > 4095) { // 4095
        //        partitionCountMax = 4095;
        //    } else
        //        partitionCountMax = partitionCount;
        //    List<double> partLst = new List<double>((int)partitionCountMax);
        //    i = 1;
        //    //CHECK
        //    for (int t = 0; t < partitionCountMax; ++t) {
        //        partLst.Add((int)RNG.GetUniform(i, partitionCount));
        //        ++i;
        //    }
        //    for (i = 0; i < partitionCountMax; ++i) {
        //        pos = 0;
        //        binStr = Fcn.Decimal2BinaryStr(partLst[(int)i]);
        //        lComb.Clear(); rComb.Clear();
        //        lPredVal.Clear(); rPredVal.Clear();
        //        nodeLfItemCount = nodeRtItemCount = 0;
        //        for (c = 0; c < p.ValueSd.Count; ++c) {
        //            if (binStr[binStr.Length - 1 - c] == '1')
        //                lComb.Add(p.ValueSd.Keys[pos]); //if the 'case' is in, put it on the left
        //            else
        //                rComb.Add(p.ValueSd.Keys[pos]);//else, in the right side
        //            ++pos;
        //        }
        //        for (instanceI = 0; instanceI < nttLst.Count; ++instanceI) {
        //            foreach (string ls in lComb) {
        //                if (nttLst[instanceI].T0 == ls) {
        //                    if (!lPredVal.ContainsKey(nttLst[instanceI].T1))
        //                        lPredVal.Add(nttLst[instanceI].T1, (int)nttLst[instanceI].N);
        //                    else
        //                        lPredVal[nttLst[instanceI].T1] += (int)nttLst[instanceI].N;
        //                    nodeLfItemCount += (int)nttLst[instanceI].N;
        //                    break;
        //                }
        //            }
        //            foreach (string rs in rComb) {
        //                if (nttLst[instanceI].T0 == rs) {
        //                    if (!rPredVal.ContainsKey(nttLst[instanceI].T1))
        //                        rPredVal.Add(nttLst[instanceI].T1, (int)nttLst[instanceI].N);
        //                    else
        //                        rPredVal[nttLst[instanceI].T1] += (int)nttLst[instanceI].N;
        //                    nodeRtItemCount += (int)nttLst[instanceI].N;
        //                    break;
        //                }
        //            }
        //        }
        //        if (nodeLfItemCount >= Def.TreeMinNumberOfCasesPerNode && nodeRtItemCount >= Def.TreeMinNumberOfCasesPerNode) {
        //            lImp = ImpCat(lPredVal, nodeLfItemCount);
        //            rImp = ImpCat(rPredVal, nodeRtItemCount);
        //            if (Double.IsNaN(minImp)) {
        //                bestPartition = partLst[(int)i];
        //                minImp = (double)(nodeLfItemCount) / instanceCount * lImp + (double)nodeRtItemCount / instanceCount * rImp;
        //                n.DescendentImpPreCalculated[0] = lImp;
        //                n.DescendentImpPreCalculated[1] = rImp;
        //            } else {
        //                imp = (double)(nodeLfItemCount) / instanceCount * lImp + (double)nodeRtItemCount / instanceCount * rImp;
        //                if (imp < minImp) {
        //                    minImp = imp;
        //                    bestPartition = partLst[(int)i];
        //                    n.DescendentImpPreCalculated[0] = lImp;
        //                    n.DescendentImpPreCalculated[1] = rImp;
        //                }
        //            }
        //        }
        //    }
        //    if (bestPartition == 0) {
        //        p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //        p.Gain = 0;
        //        return 0;
        //    }
        //    //Set the possible children
        //    ValueGroup valueGroup = new ValueGroup(2);
        //    p.ChildrenGroups = valueGroup;
        //    pos = 0;
        //    binStr = Fcn.Decimal2BinaryStr((double)bestPartition);
        //    for (c = 0; c < p.ValueSd.Count; ++c) {
        //        //                if ((bestPartition & c) == c)
        //        if (binStr[binStr.Length - 1 - c] == '1')
        //            valueGroup.ValueGroupLst[0].Add(pos); //if the 'case' is in put it on the left
        //        else
        //            valueGroup.ValueGroupLst[1].Add(pos);//else, in the right side
        //        ++pos;
        //    }
        //    n.ImpBestUniSplit = minImp;
        //    p.Gain = (n.Imp - minImp) * 100 / n.Imp;
        //    p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
        //    return p.Gain;
        //}
        //Sets the best p.Gain, valueGroup.ValueGroupLst[0] (left child) and valueGroup.ValueGroupLst[1] (right child)
        public static double MinImpCatFullSearch(NodeTargetCategorical n, Predictor p)
        {
            //For each possible partition gets the min Imp

            int valueCount = p.DistinctValuesCount;
            if (valueCount > Def.ClfMaxNumberOfValuesForFullSearch) {
                p.SplitStatus = Predictor.SplitStatusEnum.TooManyValuesToSearch;
                p.Gain = 0;
                return 0;
            }

            int partitionCount, pos, instanceI;
            uint c, i, bestPartition = 0;
            double minImp = Double.NaN, imp = Double.NaN, lImp = Double.NaN, rImp = Double.NaN;
            int nodeLfItemCount, nodeRtItemCount;
            List<NTT> nttLst;

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
            n.DescendentImpPreCalculated = new List<double>(2);
            n.DescendentImpPreCalculated.Add(0);
            n.DescendentImpPreCalculated.Add(0);

            List<string> lComb = new List<string>(valueCount);
            List<string> rComb = new List<string>(valueCount);
            SortedList<string, int> lPredVal = new SortedList<string, int>();
            SortedList<string, int> rPredVal = new SortedList<string, int>();

            string sql =
            @"SELECT ALL " +
                " count(*), " +
                Def.DbBsTb + "." + p.Variable.Name + ",  " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " AND " +
                Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + " ";
            nttLst = Def.Db.GetNTTLst(sql);

            int instanceCount = n.Table.RowCount;

            partitionCount = (int)(Math.Pow(2, valueCount - 1) - 1);

            for (i = 1; i <= partitionCount; ++i) { //Enumerates all the possible partition but the empty
                pos = 0;
                lComb.Clear(); rComb.Clear();
                lPredVal.Clear(); rPredVal.Clear();
                nodeLfItemCount = nodeRtItemCount = 0;
                for (c = 1; c <= partitionCount + 1; c *= 2) {
                    if ((i & c) == c) // i & c == c
                        lComb.Add(p.ValueSd.Keys[pos]); //if the 'case' is in, put it on the left
                    else
                        rComb.Add(p.ValueSd.Keys[pos]);//else, in the right side
                    ++pos;
                }
                for (instanceI = 0; instanceI < nttLst.Count; ++instanceI) {
                    foreach (string ls in lComb) {
                        if (nttLst[instanceI].T0 == ls) {
                            if (!lPredVal.ContainsKey(nttLst[instanceI].T1))
                                lPredVal.Add(nttLst[instanceI].T1, (int)nttLst[instanceI].N);
                            else
                                lPredVal[nttLst[instanceI].T1] += (int)nttLst[instanceI].N;
                            nodeLfItemCount += (int)nttLst[instanceI].N;
                            break;
                        }
                    }
                    foreach (string rs in rComb) {
                        if (nttLst[instanceI].T0 == rs) {
                            if (!rPredVal.ContainsKey(nttLst[instanceI].T1))
                                rPredVal.Add(nttLst[instanceI].T1, (int)nttLst[instanceI].N);
                            else
                                rPredVal[nttLst[instanceI].T1] += (int)nttLst[instanceI].N;
                            nodeRtItemCount += (int)nttLst[instanceI].N;
                            break;
                        }
                    }
                }
                if (nodeLfItemCount >= Def.TreeMinNumberOfCasesPerNode && nodeRtItemCount >= Def.TreeMinNumberOfCasesPerNode) {
                    lImp = ImpCat(lPredVal, nodeLfItemCount);
                    rImp = ImpCat(rPredVal, nodeRtItemCount);
                    if (Double.IsNaN(minImp)) {
                        bestPartition = i;
                        minImp = (double)(nodeLfItemCount) / instanceCount * lImp + (double)nodeRtItemCount / instanceCount * rImp;
                        n.DescendentImpPreCalculated[0] = lImp;
                        n.DescendentImpPreCalculated[1] = rImp;
                    } else {
                        imp = (double)(nodeLfItemCount) / instanceCount * lImp + (double)nodeRtItemCount / instanceCount * rImp;
                        if (imp < minImp) {
                            minImp = imp;
                            bestPartition = i;
                            n.DescendentImpPreCalculated[0] = lImp;
                            n.DescendentImpPreCalculated[1] = rImp;
                        }
                    }
                }
            }
            if (bestPartition == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            //Set the possible children
            ValueGroup valueGroup = new ValueGroup(p, 2);
            p.ChildrenGroups = valueGroup;

            pos = 0;
            for (c = 1; c <= partitionCount + 1; c *= 2) {
                if ((bestPartition & c) == c)
                    valueGroup.AddValueFromIndex(pos, 0); //if the 'case' is in put it on the left
                else
                    valueGroup.AddValueFromIndex(pos, 1);//else, in the right side
                ++pos;
            }
            p.ImpUniMin = minImp;
            p.Gain = (n.Imp - minImp) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
            return p.Gain;
        }
 public FrmContinuousSplitProperties(Node _node, Predictor _pred)
 {
     InitializeComponent();
     node = _node;
     pred = _pred;
 }
Ejemplo n.º 8
0
        //Sets the best p.SplitValue p.Gain
        //        public static double MinImpCont(NodeTargetCategorical n, Predictor p) {
        //            int i, leftRowCount = 0, rightRowCount = 0;
        //            //            int dfd; //delete
        //            double minImp, info, lImp, rImp, instanceCount = n.Table.RowCount;
        //            lImp = rImp = minImp = info = double.NaN;
        //            List<N3T> AvcLst;
        //            List<int> thresholdIndexLst;
        //            //Tries each partition:
        //            //AvcLst[i].N0 = Value of the dependent varible
        //            //AvcLst[i].N1 = Frequency of y
        //            //AvcLst[i].N2 = Total of distinct registries until that row
        //            //AvcLst[i].T = y
        //            string sql =
        //            @"SELECT ALL " +
        //                Def.DbBsTb + "." + p.Variable.Name + ", " +
        //                " count(" + Def.DbBsTb + "." + p.Variable.Name + "),0 , " +
        //                Def.DbBsTb + "." + Def.Schema.Target.Name + " " +
        //            "FROM "
        //                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
        //            "WHERE "
        //                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
        //                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
        //                " AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
        //            "GROUP BY " +
        //                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
        //                Def.DbBsTb + "." + p.Variable.Name + " " +
        //            "ORDER BY " +
        //                Def.DbBsTb + "." + p.Variable.Name;
        //            AvcLst = Def.Db.GetN3TLst(sql);
        //            //N2 is the number of registries until a given row
        //            if (AvcLst.Count > 0) {
        //                AvcLst[0].N2 = AvcLst[0].N1;
        //                for (i = 1; i < AvcLst.Count; ++i)
        //                    AvcLst[i].N2 = AvcLst[i - 1].N2 + AvcLst[i].N1;
        //            }
        //            thresholdIndexLst = Fcn.SetPossibleThresholdIndexLst(AvcLst);
        //            if (thresholdIndexLst.Count == 0) {
        //                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //                p.Gain = 0;
        //                return 0;
        //            }
        //            n.DescendentImpPreCalculated = new List<double>(2);
        //            n.DescendentImpPreCalculated.Add(0);
        //            n.DescendentImpPreCalculated.Add(0);
        //            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
        //            if (AvcLst.Count == 0) {
        //                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //                p.Gain = 0;
        //                return 0;
        //            }
        //            for (i = 0; i < thresholdIndexLst.Count; ++i) {
        //                lImp = ImpCont(0, thresholdIndexLst[i], AvcLst, out leftRowCount);
        //                rImp = ImpCont(thresholdIndexLst[i] + 1, AvcLst.Count - 1, AvcLst, out rightRowCount);
        //                if (Double.IsNaN(minImp) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
        //                    info = (leftRowCount * lImp + rightRowCount * rImp) / (leftRowCount + rightRowCount);
        //                    if (Double.IsNaN(info) == false) {
        //                        minImp = info;
        //                        p.SplitValue = AvcLst[thresholdIndexLst[i]].N0;
        //                        n.DescendentImpPreCalculated[0] = lImp;
        //                        n.DescendentImpPreCalculated[1] = rImp;
        //                    }
        //                } else {
        //                    info = (leftRowCount * lImp + rightRowCount * rImp) / (leftRowCount + rightRowCount);
        //                    if (info < minImp && !Double.IsNaN(info) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
        //                        minImp = info;
        //                        p.SplitValue = AvcLst[thresholdIndexLst[i]].N0;
        //                        n.DescendentImpPreCalculated[0] = lImp;
        //                        n.DescendentImpPreCalculated[1] = rImp;
        //                    }
        //                }
        //            }
        //            if (Double.IsNaN(minImp)) {
        //                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //                p.Gain = 0;
        //                return 0;
        //            }
        //            List<double> nLst;
        //            sql = @"
        //            SELECT
        //                DISTINCT " + p.Variable.Name + " " +
        //            "FROM "
        //                + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " +
        //            "WHERE "
        //                + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
        //                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " AND " +
        //                p.Variable.Name + " IS NOT NULL " +
        //            "ORDER BY "
        //                + p.Variable.Name + " ASC ";
        //            nLst = Def.Db.GetNumberLst(sql);
        //            //Finds the angle bisector of the slit
        //            if (p.SplitValue != nLst[0] && p.SplitValue != nLst[nLst.Count - 2]) {
        //                for (i = 1; i < nLst.Count - 2; ++i) {
        //                    if (p.SplitValue == nLst[i]) {
        //                        p.SplitValue = (nLst[i - 1] + nLst[i + 1]) / 2;
        //                        break;
        //                    }
        //                }
        //            }
        //            p.Gain = (n.Imp - minImp) * 100 / n.Imp;
        //            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
        //            return p.Gain;
        //        }
        public static double Killed_MinRandomImp(NodeTargetCategorical n, Predictor p)
        {
            //For some partitions gets the min Impiance

            int valueCount = p.DistinctValuesCount;

            int pos, instanceI;
            double partitionCount;
            int c, i;
            double bestPartition = 0, minImp = Double.NaN, imp = Double.NaN, lImp = Double.NaN, rImp = Double.NaN;
            int nodeLfItemCount, nodeRtItemCount;
            List<NTT> nttLst;
            string binStr = "";

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
            n.DescendentImpPreCalculated = new List<double>(2);
            n.DescendentImpPreCalculated.Add(0);
            n.DescendentImpPreCalculated.Add(0);

            List<string> lComb = new List<string>(valueCount);
            List<string> rComb = new List<string>(valueCount);
            SortedList<string, int> lPredVal = new SortedList<string, int>();
            SortedList<string, int> rPredVal = new SortedList<string, int>();

            string sql =
            @"SELECT ALL " +
                " count(*), " +
                Def.DbBsTb + "." + p.Variable.Name + ",  " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " AND " +
                Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + " ";
            nttLst = Def.Db.GetNTTLst(sql);

            int instanceCount = n.Table.RowCount;

            partitionCount = Math.Pow(2, valueCount - 1) - 1;

            double partitionCountMax = 0;
            if (partitionCount > 4095) {
                partitionCountMax = 4095;
            } else
                partitionCountMax = partitionCount;
            List<double> partLst = new List<double>((int)partitionCountMax);
            i = 1;

            //CHECK
            for (int t = 0; t < partitionCountMax; ++t) {
                partLst.Add((int)RNG.GetUniform(i, partitionCount));
                ++i;
            }

            for (i = 0; i < partitionCountMax; ++i) {
                pos = 0;
                binStr = Fcn.Decimal2BinaryStr(partLst[(int)i]);
                lComb.Clear(); rComb.Clear();
                lPredVal.Clear(); rPredVal.Clear();
                nodeLfItemCount = nodeRtItemCount = 0;
                for (c = 0; c < p.ValueSd.Count; ++c) {
                    if (binStr[binStr.Length - 1 - c] == '1')
                        lComb.Add(p.ValueSd.Keys[pos]); //if the 'case' is in, put it on the left
                    else
                        rComb.Add(p.ValueSd.Keys[pos]);//else, in the right side
                    ++pos;
                }
                for (instanceI = 0; instanceI < nttLst.Count; ++instanceI) {
                    foreach (string ls in lComb) {
                        if (nttLst[instanceI].T0 == ls) {
                            if (!lPredVal.ContainsKey(nttLst[instanceI].T1))
                                lPredVal.Add(nttLst[instanceI].T1, (int)nttLst[instanceI].N);
                            else
                                lPredVal[nttLst[instanceI].T1] += (int)nttLst[instanceI].N;
                            nodeLfItemCount += (int)nttLst[instanceI].N;
                            break;
                        }
                    }
                    foreach (string rs in rComb) {
                        if (nttLst[instanceI].T0 == rs) {
                            if (!rPredVal.ContainsKey(nttLst[instanceI].T1))
                                rPredVal.Add(nttLst[instanceI].T1, (int)nttLst[instanceI].N);
                            else
                                rPredVal[nttLst[instanceI].T1] += (int)nttLst[instanceI].N;
                            nodeRtItemCount += (int)nttLst[instanceI].N;
                            break;
                        }
                    }
                }
                if (nodeLfItemCount >= Def.TreeMinNumberOfCasesPerNode && nodeRtItemCount >= Def.TreeMinNumberOfCasesPerNode) {
                    lImp = ImpCat(lPredVal, nodeLfItemCount);
                    rImp = ImpCat(rPredVal, nodeRtItemCount);
                    if (Double.IsNaN(minImp)) {
                        bestPartition = partLst[(int)i];
                        minImp = (double)(nodeLfItemCount) / instanceCount * lImp + (double)nodeRtItemCount / instanceCount * rImp;
                        n.DescendentImpPreCalculated[0] = lImp;
                        n.DescendentImpPreCalculated[1] = rImp;
                    } else {
                        imp = (double)(nodeLfItemCount) / instanceCount * lImp + (double)nodeRtItemCount / instanceCount * rImp;
                        if (imp < minImp) {
                            minImp = imp;
                            bestPartition = partLst[(int)i];
                            n.DescendentImpPreCalculated[0] = lImp;
                            n.DescendentImpPreCalculated[1] = rImp;
                        }
                    }
                }
            }

            if (bestPartition == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            //Set the possible children
            ValueGroup valueGroup = new ValueGroup(p, 2);
            p.ChildrenGroups = valueGroup;

            pos = 0;
            binStr = Fcn.Decimal2BinaryStr((double)bestPartition);
            for (c = 0; c < p.ValueSd.Count; ++c) {
                //                if ((bestPartition & c) == c)
                if (binStr[binStr.Length - 1 - c] == '1')
                    valueGroup.AddValueFromIndex(pos, 0); //if the 'case' is in put it on the left
                else
                    valueGroup.AddValueFromIndex(pos, 1);//else, in the right side
                ++pos;
            }

            p.Gain = (n.Imp - minImp) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
            return p.Gain;
        }
Ejemplo n.º 9
0
        public static double VarianceTimesCount(List<string> vals, Node node, Predictor pred)
        {
            string sql = "", valSql="";
            List<double> NLst = null;

            for (int i = 0; i < vals.Count; ++i) {
                valSql += pred.Variable.Name + "='" + vals[i] + "' ";
                if (i < (vals.Count - 1)) {
                    valSql += " or ";
                } else
                    valSql += ")";
            }

            sql =
            @"SELECT " +
                "COALESCE(variance(" + Def.Tree.Schema.Target.Name + "), 0), count(*) " +
            "FROM "
               + Def.DbBsTb + " , " + Def.DbTrTb + node.Id + " " +
            "WHERE ("
                + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
                Def.DbTrTb + node.Id + "." + Def.DbTableIdName + ") and (" +
                valSql;
            NLst = Def.Db.GetNumberRowLst(sql);
            return NLst[0] * NLst[1];
        }
Ejemplo n.º 10
0
        //============================================================================================================
        //============================================================================================================
        private static double PartitionInfo(List<string> leftLst, List<NNT> dataLst, Node node, Predictor pred)
        {
            //Returns Double.NaN if the variance can not be calculated

            int di;
            string indep;
            double dep, leftSum = 0, leftSum2 = 0, rightSum = 0, rightSum2 = 0;
            double leftVar, rightVar, freq, leftRowCounter = 0, rightRowCounter = 0, partitionInfo;

            for (di = 0; di < dataLst.Count; ++di) {
                dep = dataLst[di].N0;
                freq = dataLst[di].N1;
                indep = dataLst[di].T;
                if (leftLst.Contains(indep)) {
                    leftSum += dep * freq;
                    leftSum2 += dep * dep * freq;
                    leftRowCounter += freq;
                } else {
                    rightSum += dep * freq;
                    rightSum2 += dep * dep * freq;
                    rightRowCounter += freq;
                }
            }
            if (leftRowCounter < Def.TreeMinNumberOfCasesPerNode || rightRowCounter < Def.TreeMinNumberOfCasesPerNode) {
                return double.NaN;
            }

            leftVar = ((leftSum2 * leftRowCounter) - (leftSum * leftSum)) / (leftRowCounter * (leftRowCounter - 1));
            rightVar = ((rightSum2 * rightRowCounter) - (rightSum * rightSum)) / (rightRowCounter * (rightRowCounter - 1));
            partitionInfo = ((leftVar * leftRowCounter) + (rightVar * rightRowCounter)) / (leftRowCounter + rightRowCounter);

            return partitionInfo;
        }
Ejemplo n.º 11
0
        //============================================================================================================
        //============================================================================================================
        //Sets the best p.SplitValue p.Gain
        //public static double MinInfoCatHeuristicSLOW(NodeTargetContinuous n, Predictor p) {
        //    int i, bestPartitionSplitPoint = 0;
        //    //            int dfd; //delete
        //    double minVar = 0, var, lVar, rVar, instanceCount = n.Table.RowCount;
        //    lVar = rVar = minVar = var = 0;
        //    List<NNT> nntLst;
        //    List<string> left = new List<string>();
        //    List<string> right = new List<string>();
        //    List<string> leftBest = new List<string>();
        //    List<string> rightBest = new List<string>();
        //    //Tries each partition:
        //    //AvsLst[i].N0 = y
        //    //AvsLst[i].N1 = Value of the dependent varible
        //    //AvsLst[i].N2 = Frequency of y
        //    //AvsLst[i].N3 = Total of distinct registries until that row
        //    string sql =
        //    @"SELECT count(*), " +
        //           "0, " +
        //           Def.DbBsTb + "." + p.Variable.Name + " " +
        //    "FROM "
        //        + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
        //    "WHERE "
        //        + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
        //        Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
        //    "GROUP BY " +
        //        Def.DbBsTb + "." + p.Variable.Name + " " +
        //    "ORDER BY avg("
        //        + Def.DbBsTb + "." + Def.Schema.Target.Name + ")";
        //    nntLst = Def.Db.GetNNTLst(sql);
        //    //N1 is the number of registries until a given row
        //    left.Add(nntLst[0].T);
        //    leftBest.Add(nntLst[0].T);
        //    if (nntLst.Count > 0) {
        //        nntLst[0].N1 = nntLst[0].N0;
        //        for (i = 1; i < nntLst.Count; ++i) {
        //            nntLst[i].N1 = nntLst[i - 1].N1 + nntLst[i].N0;
        //            right.Add(nntLst[i].T);
        //            rightBest.Add(nntLst[i].T);
        //        }
        //    }
        //    n.DescendentImpPreCalculated = new List<double>(2);
        //    n.DescendentImpPreCalculated.Add(0);
        //    n.DescendentImpPreCalculated.Add(0);
        //    p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
        //    if (nntLst.Count == 0) {
        //        p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //        p.Gain = 0;
        //        return 0;
        //    }
        //    for (i = 0; i < nntLst.Count - 1; ++i) {
        //        //if(i==12 && p.Variable.Name=="apache_2")
        //        //    dfd=8;
        //        lVar = Fcn.InfoTimesCount(left, n, p);
        //        rVar = Fcn.InfoTimesCount(right, n, p);
        //        if (i == 0) {
        //            minVar = (lVar + rVar) / instanceCount;
        //            n.DescendentImpPreCalculated[0] = lVar;
        //            n.DescendentImpPreCalculated[1] = rVar;
        //        } else {
        //            var = (lVar + rVar) / instanceCount;
        //            if (var < minVar) {
        //                minVar = var;
        //                bestPartitionSplitPoint = i;
        //                n.DescendentImpPreCalculated[0] = lVar;
        //                n.DescendentImpPreCalculated[1] = rVar;
        //                leftBest.Clear();
        //                rightBest.Clear();
        //                foreach (string s in left)
        //                    leftBest.Add(s);
        //                foreach (string s in right)
        //                    rightBest.Add(s);
        //            }
        //        }
        //        left.Add(right[0]);
        //        right.RemoveAt(0);
        //    }
        //    p.ChildrenGroups.ValueLst[0].Clear();
        //    p.ChildrenGroups.ValueLst[1].Clear();
        //    //Add left node values
        //    for (i = 0; i <= bestPartitionSplitPoint; ++i)
        //        p.ChildrenGroups.ValueLst[0].Add(p.CaseSd.IndexOfKey(nntLst[i].T));
        //    //Add right node values
        //    for (i = bestPartitionSplitPoint + 1; i < nntLst.Count; ++i)
        //        p.ChildrenGroups.ValueLst[1].Add(p.CaseSd.IndexOfKey(nntLst[i].T));
        //    p.Gain = (n.Imp - minVar) * 100 / n.Imp;
        //    return p.Gain;
        //}
        ////============================================================================================================
        ////============================================================================================================
        //Sets the best p.SplitValue p.Gain
        public static double MinInfoCont(NodeTargetContinuous n, Predictor p)
        {
            int i, leftRowCount = 0, rightRowCount = 0;
            //            int dfd; //delete
            double minVar, var, lVar, rVar, instanceCount = n.Table.RowCount;
            lVar = rVar = minVar = var = double.NaN;
            List<N4> AvsLst;
            List<int> thresholdIndexLst;
            //Tries each partition:

            //AvsLst[i].N0 = y
            //AvsLst[i].N1 = Value of the dependent varible
            //AvsLst[i].N2 = Frequency of y
            //AvsLst[i].N3 = Total of distinct registries until that row

            string sql =
            @"SELECT ALL "
                + Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + ", " +
                " count(" + Def.DbBsTb + "." + p.Variable.Name + "), 0 " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                " AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "ORDER BY " +
                Def.DbBsTb + "." + p.Variable.Name;
            AvsLst = Def.Db.GetN4Lst(sql);

            //N3 is the number of registries until a given row
            if (AvsLst.Count > 0) {
                AvsLst[0].N3 = AvsLst[0].N2;
                for (i = 1; i < AvsLst.Count; ++i)
                    AvsLst[i].N3 = AvsLst[i - 1].N3 + AvsLst[i].N2;
            }

            thresholdIndexLst = Fcn.SetPossibleThresholdIndexLst(AvsLst);
            if (thresholdIndexLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            n.DescendentImpPreCalculated = new List<double>(2);
            n.DescendentImpPreCalculated.Add(0);
            n.DescendentImpPreCalculated.Add(0);

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;

            if (AvsLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }
            for (i = 0; i < thresholdIndexLst.Count; ++i) {
                lVar = Info(0, thresholdIndexLst[i], AvsLst, out leftRowCount);
                rVar = Info(thresholdIndexLst[i] + 1, AvsLst.Count - 1, AvsLst, out rightRowCount);
                if (Double.IsNaN(minVar) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
                    var = (leftRowCount * lVar + rightRowCount * rVar) / (leftRowCount + rightRowCount);
                    if (Double.IsNaN(var) == false) {
                        minVar = var;
                        p.SplitValue = AvsLst[thresholdIndexLst[i]].N1;
                        n.DescendentImpPreCalculated[0] = lVar;
                        n.DescendentImpPreCalculated[1] = rVar;
                    }
                } else {
                    var = (leftRowCount * lVar + rightRowCount * rVar) / (leftRowCount + rightRowCount);
                    if (var < minVar && !Double.IsNaN(var) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
                        minVar = var;
                        p.SplitValue = AvsLst[thresholdIndexLst[i]].N1;
                        n.DescendentImpPreCalculated[0] = lVar;
                        n.DescendentImpPreCalculated[1] = rVar;
                    }
                }
            }
            if (Double.IsNaN(minVar)) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            p.Gain = (n.Imp - minVar) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
            return p.Gain;
        }
Ejemplo n.º 12
0
        //Sets the best p.SplitValue p.Gain
        //public static double MinInfoContSQL(NodeTargetContinuous n, Predictor p) {
        //    int i;
        //    double l, r, minVar, var;
        //    l = r = minVar = var = 0;
        //    //Tries each partition:
        //    //p.Gain = (n.Imp - minVar) * 100 / n.Imp;
        //    List<double> vlLst;
        //    List<double> NLst;
        //    OdbcTransaction dbTrans = null;
        //    dbTrans = Def.Db.Con.BeginTransaction();
        //    string sql =
        //    @"SELECT DISTINCT " +
        //        Def.DbBsTb + "." + p.Variable.Name + " " +
        //    "FROM " +
        //        Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
        //    "WHERE " +
        //        Def.DbBsTb + "." + Def.DbTableIdName + " = " +
        //        Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
        //    "ORDER BY " +
        //        Def.DbBsTb + "." + p.Variable.Name;
        //    vlLst = Def.Db.GetNumberLst(sql, dbTrans);
        //    p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
        //    if (vlLst.Count == 0) {
        //        p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //        p.Gain = 0;
        //        return 0;
        //    }
        //    for (i = 0; i < vlLst.Count; ++i) {
        //        sql =
        //       @"SELECT
        //            COALESCE(variance(" + n.Tree.Schema.Target.Name + "), 0), count(*) " +
        //        "FROM "
        //        + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " +
        //        "WHERE "
        //            + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
        //            Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " and " +
        //            Def.DbBsTb + "." + p.Variable.Name + "<=" + vlLst[i];
        //        NLst = Def.Db.GetNumberRowLst(sql, dbTrans);
        //        l = NLst[0] * NLst[1] / n.Table.RowCount;
        //        sql =
        //       @"SELECT
        //            COALESCE(variance(" + n.Tree.Schema.Target.Name + "), 0), count(*) " +
        //        "FROM "
        //        + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " +
        //        "WHERE "
        //            + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
        //            Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " and " +
        //            Def.DbBsTb + "." + p.Variable.Name + ">" + vlLst[i];
        //        NLst = Def.Db.GetNumberRowLst(sql, dbTrans);
        //        r = NLst[0] * NLst[1] / n.Table.RowCount;
        //        var = (l + r);
        //        if (i == 0) {
        //            minVar = var;
        //            p.SplitValue = vlLst[i];
        //        } else {
        //            if (var < minVar) {
        //                minVar = var;
        //                p.SplitValue = vlLst[i];
        //            }
        //        }
        //    }
        //    dbTrans.Commit();
        //    p.Gain = (n.Imp - minVar) * 100 / n.Imp; ;
        //    return p.Gain;
        //}
        //============================================================================================================
        //============================================================================================================
        //Sets the best p.SplitValue p.Gain
        //Used to be:        public static double MinInfoCatHeuristic(NodeTargetContinuous n, Predictor p) {
        public static double MinInfoCat(NodeTargetContinuous n, Predictor p)
        {
            int i, bestPartitionSplitPoint = 0;
            double minVar, lVar, rVar, instanceCount = n.Table.RowCount, partitionInfo;
            lVar = rVar = minVar = double.NaN;
            List<string> valLst;
            List<NNT> DepIndepLst;
            List<string> left = new List<string>();

            string sqlAverage =
            @"SELECT " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                "AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "ORDER BY avg("
                + Def.DbBsTb + "." + Def.Schema.Target.Name + ")";
            valLst = Def.Db.GetTextLst(sqlAverage);

            if (valLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            string sqlDepVar =
            @"SELECT " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                "count(*), " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                "AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name;
            DepIndepLst = Def.Db.GetNNTLst(sqlDepVar);

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;

            left.Add(valLst[0]);
            for (i = 0; i < valLst.Count - 1; ++i) {
                if (Double.IsNaN(minVar)) {
                    partitionInfo = PartitionInfo(left, DepIndepLst, n, p);
                    if (Double.IsNaN(partitionInfo) == false) {
                        minVar = partitionInfo;
                        bestPartitionSplitPoint = i;
                    }
                } else {
                    partitionInfo = PartitionInfo(left, DepIndepLst, n, p);
                    if (Double.IsNaN(partitionInfo) == false && partitionInfo < minVar) {
                        minVar = partitionInfo;
                        bestPartitionSplitPoint = i;
                    }
                }
                left.Add(valLst[i + 1]);
            }
            if(Double.IsNaN(minVar)){
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            p.ChildrenGroups.ValueGroupLst[0].Clear();
            p.ChildrenGroups.ValueGroupLst[1].Clear();

            //Add left node values

             //   int IndexOfKeyValLsti; // Just to check negative indexes
            for (i = 0; i <= bestPartitionSplitPoint; ++i) {
              //              IndexOfKeyValLsti = p.ValueSd.IndexOfKey(valLst[i]);
            //            if (IndexOfKeyValLsti < 0)
             //               MessageBox.Show("Negative index in MinInfoCatHeuristic", "Error");
                      p.ChildrenGroups.ValueGroupLst[0].Add(valLst[i]);
            }
            //Add right node values
            for (i = bestPartitionSplitPoint + 1; i < valLst.Count; ++i) {
             //   IndexOfKeyValLsti = p.ValueSd.IndexOfKey(valLst[i]);
               // if (IndexOfKeyValLsti < 0)
                 //   MessageBox.Show("Negative index in MinInfoCatHeuristic", "Error");
                p.ChildrenGroups.ValueGroupLst[1].Add(valLst[i]);
            }

            p.Gain = (n.Imp - minVar) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;

            return p.Gain;
        }
Ejemplo n.º 13
0
        public bool PredictorsFill(Node node)
        {
            Predictor pred = null;
            OdbcCommand cmd = null;
            OdbcDataReader reader = null;
            string qry = "";
            bool r = false;

            if (con.State != ConnectionState.Open)
                con.Open();
            if (con.State != ConnectionState.Open) {
                MessageBox.Show("Could not open the connection", "Error");
            }
            try {
                foreach (SchemaVariable predVar in Def.Schema.PredictorLst) {
                    pred = new Predictor(predVar, node, node.PredictorLst.Count);
                    if (predVar.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) {
                        qry = @"SELECT "
                                    + predVar.Name + ", count(" + predVar.Name + ") " +
                              "FROM "
                                    + Def.DbBsTb + ", " + Def.DbTrTb + node.Id + " " +
                              "WHERE "
                                    + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
                                    Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " " +
                                    " and " + predVar.Name + " IS NOT NULL " +
                              "GROUP BY "
                                    + predVar.Name;

            //                       qry = @"SELECT " + predVar.Name + ", count(" + predVar.Name + ") " +
            //                            "FROM dataset ds " +
            //                            "where Exists (select ref" + node.Id + "." + Def.DbTableIdName + " from ref" + node.Id + ", dataset " +
            //                            "where ds." + Def.DbTableIdName + "=ref" + node.Id + "." + Def.DbTableIdName + ") " +
            //                            "GROUP BY " + predVar.Name;
                        cmd = new OdbcCommand(qry, con);
                        reader = cmd.ExecuteReader();
                        while (reader.Read()) {
                            pred.ValueSd.Add(Convert.ToString(reader[0]), Convert.ToInt32(reader[1]));
                        }
                        qry =
                            @"SELECT COUNT(*)" +
                              "FROM "
                                    + Def.DbBsTb + ", " + Def.DbTrTb + node.Id + " " +
                              "where " + Def.DbBsTb + "." + Def.DbTableIdName + "="
                                       + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " "
                                       + " and " + predVar.Name + " IS NULL";
                        cmd = new OdbcCommand(qry, con);
                        reader = cmd.ExecuteReader();
                        reader.Read();
                        pred.NullCount = Convert.ToInt32(reader[0]);
                        node.PredictorLst.Add(pred);
                        node.PredCatLst.Add(pred);
                    } else {

            ////////////////////////////////////////////
            /// CAN BE REDUCED TO ONLY ONE DATABASE PASS, JUST SELECT ALL VARIABLES AT THE SAME TIME (BELOW)
            ////////////////////////////////////////////
                        qry =
                              @"SELECT
                                    COALESCE(MIN(" + predVar.Name + "),0),  COALESCE(MAX(" + predVar.Name + "), 0) " +
                              "FROM "
                                    + Def.DbBsTb + ", " + Def.DbTrTb + node.Id + " " +
                              "WHERE " + Def.DbBsTb + "." + Def.DbTableIdName + "="
                                       + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " and "
                                       + predVar.Name + " IS NOT NULL ";
                        /* +
                              "ORDER BY " + predVar.Name + " " +
                              "LIMIT 1";
                          */
                        cmd = new OdbcCommand(qry, con);
                        reader = cmd.ExecuteReader();
                        reader.Read();
                        pred.SetLowerAndHigher(Convert.ToDouble(reader[0]), Convert.ToDouble(reader[1]));
                        qry =
                            @"SELECT COUNT(DISTINCT " + predVar.Name + ")" +
                              "FROM "
                                    + Def.DbBsTb + ", " + Def.DbTrTb + node.Id + " " +
                              "WHERE " + Def.DbBsTb + "." + Def.DbTableIdName + "="
                                       + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " and "
                                       + predVar.Name + " IS NOT NULL";
                        cmd = new OdbcCommand(qry, con);
                        reader = cmd.ExecuteReader();
                        reader.Read();
                        pred.DistinctValuesCount = Convert.ToInt32(reader[0]);
                        qry =
                              @"SELECT COUNT(*)" +
                              "FROM "
                                    + Def.DbBsTb + ", " + Def.DbTrTb + node.Id + " " +
                              "where " + Def.DbBsTb + "." + Def.DbTableIdName + "="
                                       + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " "
                                       + " and " + predVar.Name + " IS NULL";
                        cmd = new OdbcCommand(qry, con);
                        reader = cmd.ExecuteReader();
                        reader.Read();
                        pred.NullCount = Convert.ToInt32(reader[0]);
                        node.PredictorLst.Add(pred);
                        node.PredNumLst.Add(pred);
                    }
                }
                r = true;
            } catch (Exception ex) {
                FE.Show(ex.Message, "Error could not execute PredictorsFill(Node node)", ex.StackTrace);
            } finally {
                reader.Close();
            }
            return r;
        }