private void btNext_Click(object sender, EventArgs e)
 {
     if (rbGini.Checked == true) {
         Def.Tree.Algorithm = Tree.AlgorithmEnum.Gini;
         Def.FrmMain.Text = Def.APPLICATION_NAME + " using Gini index " + "on the " + Def.DbTableInUse + " data set";
     } else
     if (rbEntropy.Checked == true) {
         Def.Tree.Algorithm = Tree.AlgorithmEnum.Entropy;
         Def.FrmMain.Text = Def.APPLICATION_NAME + " using Information Entropy" + " on the " + Def.DbTableInUse + " data set"; ;
     }
     else
     if (rbMaxDif.Checked == true) {
         Def.Tree.Algorithm = Tree.AlgorithmEnum.MaxDif;
         Def.FrmMain.Text = Def.APPLICATION_NAME + " using MaxDif" + " on the " + Def.DbTableInUse + " data set"; ;
     }//else
     //if (rbHybrid.Checked == true) {
     //    Def.Tree.Algorithm = Tree.AlgorithmEnum.Hybrid;
     //    Def.FrmMain.Text = Def.APPLICATION_NAME + " using mixed algorithms" + " on the " + Def.DbTableInUse + " data set"; ;
     //}
     if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) {
         NodeTargetCategorical root = new NodeTargetCategorical();
         Def.Tree.Root = root;
     } else {
         NodeTargetContinuous root = new NodeTargetContinuous();
         Def.Tree.Root = root;
     }
     Def.Tree.GrowthState = Tree.GrowthStateEnum.Root;
     Def.TreeCanBeDisplayed = true;
     Def.FrmMain.TreeBuild();
     Def.ToolBar.Items["btNew"].Enabled = true;
     Close();
 }
 private void btNext_Click(object sender, EventArgs e)
 {
     if (rbNetReductionInVariance.Checked == true) {
         Def.Tree.Algorithm = Tree.AlgorithmEnum.NetRiV;
         Def.FrmMain.Text = Def.APPLICATION_NAME + " using Net Reduction in Variance" + " on the " + Def.DbTableInUse + " data set"; ;
     } else
     if (rbGrossReductionInVariance.Checked == true) {
         Def.Tree.Algorithm = Tree.AlgorithmEnum.GrossRiV;
         Def.FrmMain.Text = Def.APPLICATION_NAME + " using Gross Reduction in Variance" + " on the " + Def.DbTableInUse + " data set"; ;
     }
     if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) {
         NodeTargetCategorical root = new NodeTargetCategorical();
         Def.Tree.Root = root;
     } else {
         NodeTargetContinuous root = new NodeTargetContinuous();
         Def.Tree.Root = root;
     }
     Def.Tree.GrowthState = Tree.GrowthStateEnum.Root;
     Def.TreeCanBeDisplayed = true;
     Def.FrmMain.TreeBuild();
     Def.ToolBar.Items["btNew"].Enabled = true;
     Close();
 }
Ejemplo n.º 3
0
        public void ManuallySplit(Node node)
        {
            int nextNodeId=-1;
            string sql="";

            NodeTargetCategorical lcat, rcat;
            NodeTargetContinuous lcon, rcon;

            if (node.SplitVariable.Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                nextNodeId = Node.CountId+1;
                Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId);
                Def.Db.ReferenceTableIndexDropIfExists(nextNodeId);
                sql =
                @"CREATE TABLE " +
                    Def.DbTrTb + nextNodeId +
                    " AS " +
                    "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " +
                        Def.DbTrTb + node.Id + ", " + Def.DbBsTb +
                    " WHERE " +
                    Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName +
                    " AND " + node.SplitVariable.Variable.Name + " <= " + node.SplitValue;
                Def.Db.ExecuteNonQuery(sql);
                Def.Db.ReferenceTableIndexCreate(nextNodeId);

                if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                    lcon = new NodeTargetContinuous(node);
                    lcon.LabelTopText = " <= " + Math.Round(node.SplitValue, 2);
                } else {
                    lcat = new NodeTargetCategorical(node);
                    lcat.LabelTopText = " <= " + Math.Round(node.SplitValue, 2);
                }

                //Right
                nextNodeId = Node.CountId + 1;
                Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId);
                Def.Db.ReferenceTableIndexDropIfExists(nextNodeId);
                sql =
                @"CREATE TABLE " +
                    Def.DbTrTb + nextNodeId +
                    " AS " +
                    "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " +
                        Def.DbTrTb + node.Id + ", " + Def.DbBsTb +
                    " WHERE " +
                    Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName +
                    " AND " + node.SplitVariable.Variable.Name + " > " + node.SplitValue;
                Def.Db.ExecuteNonQuery(sql);
                Def.Db.ReferenceTableIndexCreate(nextNodeId);
                if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                    rcon = new NodeTargetContinuous(node);
                    rcon.LabelTopText = " > " + Math.Round(node.SplitValue, 2);
                } else {
                    rcat = new NodeTargetCategorical(node);
                    rcat.LabelTopText = " > " + Math.Round(node.SplitValue, 2);
                }
            } else //SchemaVariable.VariableTypeEnum.Continuous
                if (node.SplitVariable.Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) {

                    string vals = "";
                    nextNodeId = Node.CountId + 1;
                    //LEFT NODE

                    List<string> caseLst = node.SplitVariable.ChildrenGroups.ValueGroupLst[0];
                    for (int i = 0; i < caseLst.Count; ++i) {
                        vals += node.SplitVariable.Variable.Name + "='" + caseLst[i] + "' ";
                        if (i < (caseLst.Count - 1)) {
                            vals += " or ";
                        } else
                            vals += ")";
                    }
                    Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId);
                    Def.Db.ReferenceTableIndexDropIfExists(nextNodeId);
                    sql =
                    @"CREATE TABLE " +
                        Def.DbTrTb + nextNodeId +
                        " AS " +
                        "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " +
                            Def.DbTrTb + node.Id + ", " + Def.DbBsTb +
                        " WHERE (" +
                        Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName +
                        ") AND (" + vals;
                    Def.Db.ExecuteNonQuery(sql);
                    Def.Db.ReferenceTableIndexCreate(nextNodeId);
                    if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                        lcon = new NodeTargetContinuous(node);
                    } else {
                        lcat = new NodeTargetCategorical(node);
                    }

                    //RIGHT NODE
                    vals = "";
                    nextNodeId = Node.CountId + 1;
                    caseLst = node.SplitVariable.ChildrenGroups.ValueGroupLst[1];
                    for (int i = 0; i < caseLst.Count; ++i) {
                        vals += node.SplitVariable.Variable.Name + "='" + caseLst[i] + "' ";
                        if (i < (caseLst.Count - 1)) {
                            vals += " or ";
                        } else
                            vals += ")";
                    }
                    Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId);
                    Def.Db.ReferenceTableIndexDropIfExists(nextNodeId);
                    sql =
                    @"CREATE TABLE " +
                        Def.DbTrTb + nextNodeId +
                        " AS " +
                        "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " +
                            Def.DbTrTb + node.Id + ", " + Def.DbBsTb +
                        " WHERE (" +
                        Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName +
                        ") AND (" + vals;
                    Def.Db.ExecuteNonQuery(sql);
                    Def.Db.ReferenceTableIndexCreate(nextNodeId);
                    if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                        rcon = new NodeTargetContinuous(node);
                    } else {
                        rcat = new NodeTargetCategorical(node);
                    }
                }
            afterSplit(node);
        }
Ejemplo n.º 4
0
        public bool AutoSplitMv(Node node)
        {
            int nextNodeId = -1;
            string sql = "";
            double varSum=0;

            NodeTargetCategorical lcat, rcat;
            NodeTargetContinuous lcon, rcon;

            //if (node.SplitVariable.SplitStatus != Predictor.SplitStatusEnum.CanBeUsed)
            //    return false;

            if (node.Level >= Def.TreeLevelsMax)
                return false;

            nextNodeId = Node.CountId + 1;
            Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId);
            Def.Db.ReferenceTableIndexDropIfExists(nextNodeId);
            //Left node
            sql =
            @"CREATE TABLE " +
                Def.DbTrTb + nextNodeId + "(" + Def.DbTableIdName + " integer NOT NULL)";
            Def.Db.ExecuteNonQuery(sql);

            //Right node
            nextNodeId = nextNodeId + 1;
            Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId);
            Def.Db.ReferenceTableIndexDropIfExists(nextNodeId);
            sql =
            @"CREATE TABLE " +
                Def.DbTrTb + nextNodeId + "(" + Def.DbTableIdName + " integer NOT NULL)";
            Def.Db.ExecuteNonQuery(sql);

            List<string> insert = new List<string>();
            for (int y = 0; y < node.Table.RowCount; ++y) {
                varSum = 0;
                for (int i = 0; i < node.PredMvLst.Count; ++i) {
                    varSum += node.PredMvLst[i].Coef * node.PredMvLst[i].X(y);
                }
                if ((varSum + node.C) <= 0) {
                    insert.Add(@"insert into " + Def.DbTrTb + (nextNodeId - 1) + " values (" + node.MvTb.Data.ID[y] + ")");
                } else {
                    insert.Add(@"insert into " + Def.DbTrTb + (nextNodeId) + " values (" + node.MvTb.Data.ID[y] + ")");
                }
            }
            Def.Db.NonQueryTransaction(insert);
            Def.Db.ReferenceTableIndexCreate(nextNodeId-1);
            Def.Db.ReferenceTableIndexCreate(nextNodeId);

            if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) {
                rcon = new NodeTargetContinuous(node);
                lcon = new NodeTargetContinuous(node);
                // node equation goes here
            } else {
                rcat = new NodeTargetCategorical(node);
                lcat = new NodeTargetCategorical(node);
                // node equation goes here
            }

            afterSplit(node);
            return true;
        }
Ejemplo n.º 5
0
        public NodeTargetContinuousUI(NodeTargetContinuous node)
        {
            int x, y;
            x = y = 0;

            Def.PbBase.Controls.Add(this);
            this.components = new System.ComponentModel.Container();
            this.Selector = new System.Windows.Forms.ContextMenuStrip(this.components);
            this.Selector.GripMargin = new System.Windows.Forms.Padding(2);
            this.Selector.SuspendLayout();
            this.SuspendLayout();
            //            this.AutoSizeColumnsMode = System.Windows.Forms.DataGridViewAutoSizeColumnsMode.AllCells;
            //            this.AutoSizeRowsMode = System.Windows.Forms.DataGridViewAutoSizeRowsMode.AllCells;

            this.Node = node;
            LabelTop = new Label();
            LabelBottom = new Label();
            LabelTop.SuspendLayout();
            LabelBottom.SuspendLayout();
            Def.PbBase.Controls.Add(this.LabelTop);
            Def.PbBase.Controls.Add(this.LabelBottom);
            LabelTop.AutoSize = true;
            LabelBottom.AutoSize = true;
            this.viewdataToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
            this.manuallysplitToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
            this.autosplitToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
            this.fullautogrowToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
            this.removeSubNodesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
            this.MouseHover += new System.EventHandler(grid_MouseHover);
            this.MouseLeave += new System.EventHandler(grid_MouseLeave);
            this.ContextMenuStrip = this.Selector;
            //
            // Selector
            //
            this.Selector.AllowDrop = true;
            this.Selector.Items.AddRange(new System.Windows.Forms.ToolStripItem[] {
            this.viewdataToolStripMenuItem,
            this.manuallysplitToolStripMenuItem,
            this.autosplitToolStripMenuItem,
            this.fullautogrowToolStripMenuItem,
            this.removeSubNodesToolStripMenuItem});
            this.Selector.Location = new System.Drawing.Point(23, 61);
            this.Selector.Name = "Menu";
            this.Selector.Size = new System.Drawing.Size(133, 148);
            this.Selector.Visible = true;
            //
            // viewdataToolStripMenuItem
            //
            this.viewdataToolStripMenuItem.Name = "viewdataToolStripMenuItem";
            this.viewdataToolStripMenuItem.Text = "View data";
            this.viewdataToolStripMenuItem.Click += new System.EventHandler(this.viewdataToolStripMenuItem_Click);
            //
            // manuallysplitToolStripMenuItem
            //
            this.manuallysplitToolStripMenuItem.Name = "manuallysplitToolStripMenuItem";
            this.manuallysplitToolStripMenuItem.Text = "Manually split";
            this.manuallysplitToolStripMenuItem.Click += new System.EventHandler(this.manuallysplitToolStripMenuItem_Click);
            //
            // autosplitToolStripMenuItem
            //
            this.autosplitToolStripMenuItem.Name = "autosplitToolStripMenuItem";
            this.autosplitToolStripMenuItem.Text = "Auto split";
            this.autosplitToolStripMenuItem.Click += new System.EventHandler(this.autosplitToolStripMenuItem_Click);
            //
            // fullautogrowToolStripMenuItem
            //
            this.fullautogrowToolStripMenuItem.Name = "fullautogrowToolStripMenuItem";
            this.fullautogrowToolStripMenuItem.Text = "Full auto grow";
            this.fullautogrowToolStripMenuItem.Click += new System.EventHandler(this.fullautogrowToolStripMenuItem_Click);
            //
            // removeSubNodesToolStripMenuItem
            //
            this.removeSubNodesToolStripMenuItem.Name = "removeSubNodesToolStripMenuItem";
            this.removeSubNodesToolStripMenuItem.Text = "Remove subnodes";
            this.removeSubNodesToolStripMenuItem.Click += new System.EventHandler(this.removeSubNodesToolStripMenuItem_Click);

            DataGridViewCellStyle Style1 = new DataGridViewCellStyle();
            DataGridViewCellStyle Style2 = new DataGridViewCellStyle();
            C1 = new DataGridViewTextBoxColumn();
            C2 = new DataGridViewTextBoxColumn();
            AllowUserToAddRows = false;
            AllowUserToDeleteRows = false;
            AllowUserToOrderColumns = false;
            AllowUserToResizeColumns = false;
            AllowUserToResizeRows = false;
            Style1.Alignment = DataGridViewContentAlignment.TopLeft;
            Style1.WrapMode = DataGridViewTriState.False;
            AlternatingRowsDefaultCellStyle = Style1;
            CellBorderStyle = DataGridViewCellBorderStyle.SingleVertical;
            ClipboardCopyMode = DataGridViewClipboardCopyMode.Disable;
            Style2.Alignment = DataGridViewContentAlignment.TopLeft;
            Style2.BackColor = System.Drawing.SystemColors.Control;
            Style2.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            Style2.ForeColor = System.Drawing.SystemColors.WindowText;
            Style2.SelectionBackColor = System.Drawing.SystemColors.Highlight;
            Style2.SelectionForeColor = System.Drawing.SystemColors.HighlightText;
            Style2.WrapMode = DataGridViewTriState.True;
            ColumnHeadersDefaultCellStyle = Style2;
            ColumnHeadersVisible = false;
            Columns.Add(this.C1);
            Columns.Add(this.C2);
            Cursor = Cursors.Hand;
            EditMode = DataGridViewEditMode.EditProgrammatically;
            Margin = new Padding(0);
            MultiSelect = false;
            Name = "nodeContinuous";
            ReadOnly = true;
            RowHeadersVisible = false;
            RowTemplate.Height = 18;
            ScrollBars = ScrollBars.None;
            //            SelectionMode = DataGridViewSelectionMode.ColumnHeaderSelect;
            this.C1.DefaultCellStyle = Style1;
            this.C1.Frozen = true;
            this.C1.HeaderText = "Column1";
            this.C1.Name = "c1";
            this.C1.ReadOnly = true;
            this.C1.Width = 50;
            this.C2.DefaultCellStyle = Style2;
            this.C2.Frozen = true;
            this.C2.HeaderText = "Column2";
            this.C2.Name = "c2";
            this.C2.ReadOnly = true;
            this.C2.Width = 60;

            Rows.Add(6);
            Rows[y].Cells[x].Value = "Node " + Node.Id; ++y;
            Rows[y].Cells[x].Value = "Mean"; ++y;
            Rows[y].Cells[x].Value = "Std Dev"; ++y;
            Rows[y].Cells[x].Value = "Cf Var"; ++y;
            Rows[y].Cells[x].Value = "n"; ++y;
            Rows[y].Cells[x].Value = "%"; ++y;
            ++x; y = 1;

            double mean = Node.Mean;
            double stdDev = Node.StdDev;

            Rows[y].Cells[x].Value = Math.Round(mean, 2); ++y;
            Rows[y].Cells[x].Value = Math.Round(stdDev, 2); ++y;
            Rows[y].Cells[x].Value = Math.Round(stdDev / mean, 2); ++y;
            Rows[y].Cells[x].Value = Node.Table.RowCount; ++y;
            Rows[y].Cells[x].Value = Math.Round((double)Node.Table.RowCount / (double)Def.TrainingSetRowCount * (double)100, 2); ++y;
            ++x; y = 1;

            //            AutoSizeColumn(DataGridViewAutoSizeColumnCriteria.HeaderAndRows, 0);
            //            AutoSizeColumn(DataGridViewAutoSizeColumnCriteria.HeaderAndRows, 1);

            LabelTop.Text = "";
            LabelBottom.Text = "";

            Height = (RowTemplate.Height * 6) + 4;
            Width = 110;
            removeSubNodesToolStripMenuItem.Enabled = false;
            this.Selector.ResumeLayout(false);
            this.Selector.Hide();
            this.ResumeLayout(false);
        }
Ejemplo n.º 6
0
 private void treeInitialise()
 {
     if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) {
         NodeTargetCategorical root = new NodeTargetCategorical();
         Def.Tree.Root = root;
     } else {
         NodeTargetContinuous root = new NodeTargetContinuous();
         Def.Tree.Root = root;
     }
     Def.Tree.GrowthState = Tree.GrowthStateEnum.Root;
     Def.TreeCanBeDisplayed = false;
     //Def.FrmMain.TreeBuild();
 }
Ejemplo n.º 7
0
        //============================================================================================================
        //============================================================================================================
        //Sets the best p.SplitValue p.Gain
        //public static double MinInfoCatHeuristicSLOW(NodeTargetContinuous n, Predictor p) {
        //    int i, bestPartitionSplitPoint = 0;
        //    //            int dfd; //delete
        //    double minVar = 0, var, lVar, rVar, instanceCount = n.Table.RowCount;
        //    lVar = rVar = minVar = var = 0;
        //    List<NNT> nntLst;
        //    List<string> left = new List<string>();
        //    List<string> right = new List<string>();
        //    List<string> leftBest = new List<string>();
        //    List<string> rightBest = new List<string>();
        //    //Tries each partition:
        //    //AvsLst[i].N0 = y
        //    //AvsLst[i].N1 = Value of the dependent varible
        //    //AvsLst[i].N2 = Frequency of y
        //    //AvsLst[i].N3 = Total of distinct registries until that row
        //    string sql =
        //    @"SELECT count(*), " +
        //           "0, " +
        //           Def.DbBsTb + "." + p.Variable.Name + " " +
        //    "FROM "
        //        + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
        //    "WHERE "
        //        + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
        //        Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
        //    "GROUP BY " +
        //        Def.DbBsTb + "." + p.Variable.Name + " " +
        //    "ORDER BY avg("
        //        + Def.DbBsTb + "." + Def.Schema.Target.Name + ")";
        //    nntLst = Def.Db.GetNNTLst(sql);
        //    //N1 is the number of registries until a given row
        //    left.Add(nntLst[0].T);
        //    leftBest.Add(nntLst[0].T);
        //    if (nntLst.Count > 0) {
        //        nntLst[0].N1 = nntLst[0].N0;
        //        for (i = 1; i < nntLst.Count; ++i) {
        //            nntLst[i].N1 = nntLst[i - 1].N1 + nntLst[i].N0;
        //            right.Add(nntLst[i].T);
        //            rightBest.Add(nntLst[i].T);
        //        }
        //    }
        //    n.DescendentImpPreCalculated = new List<double>(2);
        //    n.DescendentImpPreCalculated.Add(0);
        //    n.DescendentImpPreCalculated.Add(0);
        //    p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
        //    if (nntLst.Count == 0) {
        //        p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //        p.Gain = 0;
        //        return 0;
        //    }
        //    for (i = 0; i < nntLst.Count - 1; ++i) {
        //        //if(i==12 && p.Variable.Name=="apache_2")
        //        //    dfd=8;
        //        lVar = Fcn.InfoTimesCount(left, n, p);
        //        rVar = Fcn.InfoTimesCount(right, n, p);
        //        if (i == 0) {
        //            minVar = (lVar + rVar) / instanceCount;
        //            n.DescendentImpPreCalculated[0] = lVar;
        //            n.DescendentImpPreCalculated[1] = rVar;
        //        } else {
        //            var = (lVar + rVar) / instanceCount;
        //            if (var < minVar) {
        //                minVar = var;
        //                bestPartitionSplitPoint = i;
        //                n.DescendentImpPreCalculated[0] = lVar;
        //                n.DescendentImpPreCalculated[1] = rVar;
        //                leftBest.Clear();
        //                rightBest.Clear();
        //                foreach (string s in left)
        //                    leftBest.Add(s);
        //                foreach (string s in right)
        //                    rightBest.Add(s);
        //            }
        //        }
        //        left.Add(right[0]);
        //        right.RemoveAt(0);
        //    }
        //    p.ChildrenGroups.ValueLst[0].Clear();
        //    p.ChildrenGroups.ValueLst[1].Clear();
        //    //Add left node values
        //    for (i = 0; i <= bestPartitionSplitPoint; ++i)
        //        p.ChildrenGroups.ValueLst[0].Add(p.CaseSd.IndexOfKey(nntLst[i].T));
        //    //Add right node values
        //    for (i = bestPartitionSplitPoint + 1; i < nntLst.Count; ++i)
        //        p.ChildrenGroups.ValueLst[1].Add(p.CaseSd.IndexOfKey(nntLst[i].T));
        //    p.Gain = (n.Imp - minVar) * 100 / n.Imp;
        //    return p.Gain;
        //}
        ////============================================================================================================
        ////============================================================================================================
        //Sets the best p.SplitValue p.Gain
        public static double MinInfoCont(NodeTargetContinuous n, Predictor p)
        {
            int i, leftRowCount = 0, rightRowCount = 0;
            //            int dfd; //delete
            double minVar, var, lVar, rVar, instanceCount = n.Table.RowCount;
            lVar = rVar = minVar = var = double.NaN;
            List<N4> AvsLst;
            List<int> thresholdIndexLst;
            //Tries each partition:

            //AvsLst[i].N0 = y
            //AvsLst[i].N1 = Value of the dependent varible
            //AvsLst[i].N2 = Frequency of y
            //AvsLst[i].N3 = Total of distinct registries until that row

            string sql =
            @"SELECT ALL "
                + Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + ", " +
                " count(" + Def.DbBsTb + "." + p.Variable.Name + "), 0 " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                " AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "ORDER BY " +
                Def.DbBsTb + "." + p.Variable.Name;
            AvsLst = Def.Db.GetN4Lst(sql);

            //N3 is the number of registries until a given row
            if (AvsLst.Count > 0) {
                AvsLst[0].N3 = AvsLst[0].N2;
                for (i = 1; i < AvsLst.Count; ++i)
                    AvsLst[i].N3 = AvsLst[i - 1].N3 + AvsLst[i].N2;
            }

            thresholdIndexLst = Fcn.SetPossibleThresholdIndexLst(AvsLst);
            if (thresholdIndexLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            n.DescendentImpPreCalculated = new List<double>(2);
            n.DescendentImpPreCalculated.Add(0);
            n.DescendentImpPreCalculated.Add(0);

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;

            if (AvsLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }
            for (i = 0; i < thresholdIndexLst.Count; ++i) {
                lVar = Info(0, thresholdIndexLst[i], AvsLst, out leftRowCount);
                rVar = Info(thresholdIndexLst[i] + 1, AvsLst.Count - 1, AvsLst, out rightRowCount);
                if (Double.IsNaN(minVar) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
                    var = (leftRowCount * lVar + rightRowCount * rVar) / (leftRowCount + rightRowCount);
                    if (Double.IsNaN(var) == false) {
                        minVar = var;
                        p.SplitValue = AvsLst[thresholdIndexLst[i]].N1;
                        n.DescendentImpPreCalculated[0] = lVar;
                        n.DescendentImpPreCalculated[1] = rVar;
                    }
                } else {
                    var = (leftRowCount * lVar + rightRowCount * rVar) / (leftRowCount + rightRowCount);
                    if (var < minVar && !Double.IsNaN(var) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) {
                        minVar = var;
                        p.SplitValue = AvsLst[thresholdIndexLst[i]].N1;
                        n.DescendentImpPreCalculated[0] = lVar;
                        n.DescendentImpPreCalculated[1] = rVar;
                    }
                }
            }
            if (Double.IsNaN(minVar)) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            p.Gain = (n.Imp - minVar) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;
            return p.Gain;
        }
Ejemplo n.º 8
0
        //Sets the best p.SplitValue p.Gain
        //public static double MinInfoContSQL(NodeTargetContinuous n, Predictor p) {
        //    int i;
        //    double l, r, minVar, var;
        //    l = r = minVar = var = 0;
        //    //Tries each partition:
        //    //p.Gain = (n.Imp - minVar) * 100 / n.Imp;
        //    List<double> vlLst;
        //    List<double> NLst;
        //    OdbcTransaction dbTrans = null;
        //    dbTrans = Def.Db.Con.BeginTransaction();
        //    string sql =
        //    @"SELECT DISTINCT " +
        //        Def.DbBsTb + "." + p.Variable.Name + " " +
        //    "FROM " +
        //        Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
        //    "WHERE " +
        //        Def.DbBsTb + "." + Def.DbTableIdName + " = " +
        //        Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
        //    "ORDER BY " +
        //        Def.DbBsTb + "." + p.Variable.Name;
        //    vlLst = Def.Db.GetNumberLst(sql, dbTrans);
        //    p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;
        //    if (vlLst.Count == 0) {
        //        p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
        //        p.Gain = 0;
        //        return 0;
        //    }
        //    for (i = 0; i < vlLst.Count; ++i) {
        //        sql =
        //       @"SELECT
        //            COALESCE(variance(" + n.Tree.Schema.Target.Name + "), 0), count(*) " +
        //        "FROM "
        //        + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " +
        //        "WHERE "
        //            + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
        //            Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " and " +
        //            Def.DbBsTb + "." + p.Variable.Name + "<=" + vlLst[i];
        //        NLst = Def.Db.GetNumberRowLst(sql, dbTrans);
        //        l = NLst[0] * NLst[1] / n.Table.RowCount;
        //        sql =
        //       @"SELECT
        //            COALESCE(variance(" + n.Tree.Schema.Target.Name + "), 0), count(*) " +
        //        "FROM "
        //        + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " +
        //        "WHERE "
        //            + Def.DbBsTb + "." + Def.DbTableIdName + "=" +
        //            Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " and " +
        //            Def.DbBsTb + "." + p.Variable.Name + ">" + vlLst[i];
        //        NLst = Def.Db.GetNumberRowLst(sql, dbTrans);
        //        r = NLst[0] * NLst[1] / n.Table.RowCount;
        //        var = (l + r);
        //        if (i == 0) {
        //            minVar = var;
        //            p.SplitValue = vlLst[i];
        //        } else {
        //            if (var < minVar) {
        //                minVar = var;
        //                p.SplitValue = vlLst[i];
        //            }
        //        }
        //    }
        //    dbTrans.Commit();
        //    p.Gain = (n.Imp - minVar) * 100 / n.Imp; ;
        //    return p.Gain;
        //}
        //============================================================================================================
        //============================================================================================================
        //Sets the best p.SplitValue p.Gain
        //Used to be:        public static double MinInfoCatHeuristic(NodeTargetContinuous n, Predictor p) {
        public static double MinInfoCat(NodeTargetContinuous n, Predictor p)
        {
            int i, bestPartitionSplitPoint = 0;
            double minVar, lVar, rVar, instanceCount = n.Table.RowCount, partitionInfo;
            lVar = rVar = minVar = double.NaN;
            List<string> valLst;
            List<NNT> DepIndepLst;
            List<string> left = new List<string>();

            string sqlAverage =
            @"SELECT " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                "AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "ORDER BY avg("
                + Def.DbBsTb + "." + Def.Schema.Target.Name + ")";
            valLst = Def.Db.GetTextLst(sqlAverage);

            if (valLst.Count == 0) {
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            string sqlDepVar =
            @"SELECT " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                "count(*), " +
                Def.DbBsTb + "." + p.Variable.Name + " " +
            "FROM "
                + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " +
            "WHERE "
                + Def.DbBsTb + "." + Def.DbTableIdName + " = " +
                Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " +
                "AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " +
            "GROUP BY " +
                Def.DbBsTb + "." + Def.Schema.Target.Name + ", " +
                Def.DbBsTb + "." + p.Variable.Name;
            DepIndepLst = Def.Db.GetNNTLst(sqlDepVar);

            p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed;

            left.Add(valLst[0]);
            for (i = 0; i < valLst.Count - 1; ++i) {
                if (Double.IsNaN(minVar)) {
                    partitionInfo = PartitionInfo(left, DepIndepLst, n, p);
                    if (Double.IsNaN(partitionInfo) == false) {
                        minVar = partitionInfo;
                        bestPartitionSplitPoint = i;
                    }
                } else {
                    partitionInfo = PartitionInfo(left, DepIndepLst, n, p);
                    if (Double.IsNaN(partitionInfo) == false && partitionInfo < minVar) {
                        minVar = partitionInfo;
                        bestPartitionSplitPoint = i;
                    }
                }
                left.Add(valLst[i + 1]);
            }
            if(Double.IsNaN(minVar)){
                p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases;
                p.Gain = 0;
                return 0;
            }

            p.ChildrenGroups.ValueGroupLst[0].Clear();
            p.ChildrenGroups.ValueGroupLst[1].Clear();

            //Add left node values

             //   int IndexOfKeyValLsti; // Just to check negative indexes
            for (i = 0; i <= bestPartitionSplitPoint; ++i) {
              //              IndexOfKeyValLsti = p.ValueSd.IndexOfKey(valLst[i]);
            //            if (IndexOfKeyValLsti < 0)
             //               MessageBox.Show("Negative index in MinInfoCatHeuristic", "Error");
                      p.ChildrenGroups.ValueGroupLst[0].Add(valLst[i]);
            }
            //Add right node values
            for (i = bestPartitionSplitPoint + 1; i < valLst.Count; ++i) {
             //   IndexOfKeyValLsti = p.ValueSd.IndexOfKey(valLst[i]);
               // if (IndexOfKeyValLsti < 0)
                 //   MessageBox.Show("Negative index in MinInfoCatHeuristic", "Error");
                p.ChildrenGroups.ValueGroupLst[1].Add(valLst[i]);
            }

            p.Gain = (n.Imp - minVar) * 100 / n.Imp;
            p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount;

            return p.Gain;
        }