private void btNext_Click(object sender, EventArgs e) { if (rbGini.Checked == true) { Def.Tree.Algorithm = Tree.AlgorithmEnum.Gini; Def.FrmMain.Text = Def.APPLICATION_NAME + " using Gini index " + "on the " + Def.DbTableInUse + " data set"; } else if (rbEntropy.Checked == true) { Def.Tree.Algorithm = Tree.AlgorithmEnum.Entropy; Def.FrmMain.Text = Def.APPLICATION_NAME + " using Information Entropy" + " on the " + Def.DbTableInUse + " data set"; ; } else if (rbMaxDif.Checked == true) { Def.Tree.Algorithm = Tree.AlgorithmEnum.MaxDif; Def.FrmMain.Text = Def.APPLICATION_NAME + " using MaxDif" + " on the " + Def.DbTableInUse + " data set"; ; }//else //if (rbHybrid.Checked == true) { // Def.Tree.Algorithm = Tree.AlgorithmEnum.Hybrid; // Def.FrmMain.Text = Def.APPLICATION_NAME + " using mixed algorithms" + " on the " + Def.DbTableInUse + " data set"; ; //} if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) { NodeTargetCategorical root = new NodeTargetCategorical(); Def.Tree.Root = root; } else { NodeTargetContinuous root = new NodeTargetContinuous(); Def.Tree.Root = root; } Def.Tree.GrowthState = Tree.GrowthStateEnum.Root; Def.TreeCanBeDisplayed = true; Def.FrmMain.TreeBuild(); Def.ToolBar.Items["btNew"].Enabled = true; Close(); }
private void btNext_Click(object sender, EventArgs e) { if (rbNetReductionInVariance.Checked == true) { Def.Tree.Algorithm = Tree.AlgorithmEnum.NetRiV; Def.FrmMain.Text = Def.APPLICATION_NAME + " using Net Reduction in Variance" + " on the " + Def.DbTableInUse + " data set"; ; } else if (rbGrossReductionInVariance.Checked == true) { Def.Tree.Algorithm = Tree.AlgorithmEnum.GrossRiV; Def.FrmMain.Text = Def.APPLICATION_NAME + " using Gross Reduction in Variance" + " on the " + Def.DbTableInUse + " data set"; ; } if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) { NodeTargetCategorical root = new NodeTargetCategorical(); Def.Tree.Root = root; } else { NodeTargetContinuous root = new NodeTargetContinuous(); Def.Tree.Root = root; } Def.Tree.GrowthState = Tree.GrowthStateEnum.Root; Def.TreeCanBeDisplayed = true; Def.FrmMain.TreeBuild(); Def.ToolBar.Items["btNew"].Enabled = true; Close(); }
public void ManuallySplit(Node node) { int nextNodeId=-1; string sql=""; NodeTargetCategorical lcat, rcat; NodeTargetContinuous lcon, rcon; if (node.SplitVariable.Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) { nextNodeId = Node.CountId+1; Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId); Def.Db.ReferenceTableIndexDropIfExists(nextNodeId); sql = @"CREATE TABLE " + Def.DbTrTb + nextNodeId + " AS " + "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " + Def.DbTrTb + node.Id + ", " + Def.DbBsTb + " WHERE " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName + " AND " + node.SplitVariable.Variable.Name + " <= " + node.SplitValue; Def.Db.ExecuteNonQuery(sql); Def.Db.ReferenceTableIndexCreate(nextNodeId); if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) { lcon = new NodeTargetContinuous(node); lcon.LabelTopText = " <= " + Math.Round(node.SplitValue, 2); } else { lcat = new NodeTargetCategorical(node); lcat.LabelTopText = " <= " + Math.Round(node.SplitValue, 2); } //Right nextNodeId = Node.CountId + 1; Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId); Def.Db.ReferenceTableIndexDropIfExists(nextNodeId); sql = @"CREATE TABLE " + Def.DbTrTb + nextNodeId + " AS " + "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " + Def.DbTrTb + node.Id + ", " + Def.DbBsTb + " WHERE " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName + " AND " + node.SplitVariable.Variable.Name + " > " + node.SplitValue; Def.Db.ExecuteNonQuery(sql); Def.Db.ReferenceTableIndexCreate(nextNodeId); if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) { rcon = new NodeTargetContinuous(node); rcon.LabelTopText = " > " + Math.Round(node.SplitValue, 2); } else { rcat = new NodeTargetCategorical(node); rcat.LabelTopText = " > " + Math.Round(node.SplitValue, 2); } } else //SchemaVariable.VariableTypeEnum.Continuous if (node.SplitVariable.Variable.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) { string vals = ""; nextNodeId = Node.CountId + 1; //LEFT NODE List<string> caseLst = node.SplitVariable.ChildrenGroups.ValueGroupLst[0]; for (int i = 0; i < caseLst.Count; ++i) { vals += node.SplitVariable.Variable.Name + "='" + caseLst[i] + "' "; if (i < (caseLst.Count - 1)) { vals += " or "; } else vals += ")"; } Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId); Def.Db.ReferenceTableIndexDropIfExists(nextNodeId); sql = @"CREATE TABLE " + Def.DbTrTb + nextNodeId + " AS " + "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " + Def.DbTrTb + node.Id + ", " + Def.DbBsTb + " WHERE (" + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName + ") AND (" + vals; Def.Db.ExecuteNonQuery(sql); Def.Db.ReferenceTableIndexCreate(nextNodeId); if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) { lcon = new NodeTargetContinuous(node); } else { lcat = new NodeTargetCategorical(node); } //RIGHT NODE vals = ""; nextNodeId = Node.CountId + 1; caseLst = node.SplitVariable.ChildrenGroups.ValueGroupLst[1]; for (int i = 0; i < caseLst.Count; ++i) { vals += node.SplitVariable.Variable.Name + "='" + caseLst[i] + "' "; if (i < (caseLst.Count - 1)) { vals += " or "; } else vals += ")"; } Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId); Def.Db.ReferenceTableIndexDropIfExists(nextNodeId); sql = @"CREATE TABLE " + Def.DbTrTb + nextNodeId + " AS " + "SELECT " + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + " FROM " + Def.DbTrTb + node.Id + ", " + Def.DbBsTb + " WHERE (" + Def.DbTrTb + node.Id + "." + Def.DbTableIdName + "=" + Def.DbBsTb + "." + Def.DbTableIdName + ") AND (" + vals; Def.Db.ExecuteNonQuery(sql); Def.Db.ReferenceTableIndexCreate(nextNodeId); if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) { rcon = new NodeTargetContinuous(node); } else { rcat = new NodeTargetCategorical(node); } } afterSplit(node); }
public bool AutoSplitMv(Node node) { int nextNodeId = -1; string sql = ""; double varSum=0; NodeTargetCategorical lcat, rcat; NodeTargetContinuous lcon, rcon; //if (node.SplitVariable.SplitStatus != Predictor.SplitStatusEnum.CanBeUsed) // return false; if (node.Level >= Def.TreeLevelsMax) return false; nextNodeId = Node.CountId + 1; Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId); Def.Db.ReferenceTableIndexDropIfExists(nextNodeId); //Left node sql = @"CREATE TABLE " + Def.DbTrTb + nextNodeId + "(" + Def.DbTableIdName + " integer NOT NULL)"; Def.Db.ExecuteNonQuery(sql); //Right node nextNodeId = nextNodeId + 1; Def.Db.TableDropIfExists(Def.DbTrTb + nextNodeId); Def.Db.ReferenceTableIndexDropIfExists(nextNodeId); sql = @"CREATE TABLE " + Def.DbTrTb + nextNodeId + "(" + Def.DbTableIdName + " integer NOT NULL)"; Def.Db.ExecuteNonQuery(sql); List<string> insert = new List<string>(); for (int y = 0; y < node.Table.RowCount; ++y) { varSum = 0; for (int i = 0; i < node.PredMvLst.Count; ++i) { varSum += node.PredMvLst[i].Coef * node.PredMvLst[i].X(y); } if ((varSum + node.C) <= 0) { insert.Add(@"insert into " + Def.DbTrTb + (nextNodeId - 1) + " values (" + node.MvTb.Data.ID[y] + ")"); } else { insert.Add(@"insert into " + Def.DbTrTb + (nextNodeId) + " values (" + node.MvTb.Data.ID[y] + ")"); } } Def.Db.NonQueryTransaction(insert); Def.Db.ReferenceTableIndexCreate(nextNodeId-1); Def.Db.ReferenceTableIndexCreate(nextNodeId); if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Continuous) { rcon = new NodeTargetContinuous(node); lcon = new NodeTargetContinuous(node); // node equation goes here } else { rcat = new NodeTargetCategorical(node); lcat = new NodeTargetCategorical(node); // node equation goes here } afterSplit(node); return true; }
public NodeTargetContinuousUI(NodeTargetContinuous node) { int x, y; x = y = 0; Def.PbBase.Controls.Add(this); this.components = new System.ComponentModel.Container(); this.Selector = new System.Windows.Forms.ContextMenuStrip(this.components); this.Selector.GripMargin = new System.Windows.Forms.Padding(2); this.Selector.SuspendLayout(); this.SuspendLayout(); // this.AutoSizeColumnsMode = System.Windows.Forms.DataGridViewAutoSizeColumnsMode.AllCells; // this.AutoSizeRowsMode = System.Windows.Forms.DataGridViewAutoSizeRowsMode.AllCells; this.Node = node; LabelTop = new Label(); LabelBottom = new Label(); LabelTop.SuspendLayout(); LabelBottom.SuspendLayout(); Def.PbBase.Controls.Add(this.LabelTop); Def.PbBase.Controls.Add(this.LabelBottom); LabelTop.AutoSize = true; LabelBottom.AutoSize = true; this.viewdataToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.manuallysplitToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.autosplitToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.fullautogrowToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.removeSubNodesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.MouseHover += new System.EventHandler(grid_MouseHover); this.MouseLeave += new System.EventHandler(grid_MouseLeave); this.ContextMenuStrip = this.Selector; // // Selector // this.Selector.AllowDrop = true; this.Selector.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { this.viewdataToolStripMenuItem, this.manuallysplitToolStripMenuItem, this.autosplitToolStripMenuItem, this.fullautogrowToolStripMenuItem, this.removeSubNodesToolStripMenuItem}); this.Selector.Location = new System.Drawing.Point(23, 61); this.Selector.Name = "Menu"; this.Selector.Size = new System.Drawing.Size(133, 148); this.Selector.Visible = true; // // viewdataToolStripMenuItem // this.viewdataToolStripMenuItem.Name = "viewdataToolStripMenuItem"; this.viewdataToolStripMenuItem.Text = "View data"; this.viewdataToolStripMenuItem.Click += new System.EventHandler(this.viewdataToolStripMenuItem_Click); // // manuallysplitToolStripMenuItem // this.manuallysplitToolStripMenuItem.Name = "manuallysplitToolStripMenuItem"; this.manuallysplitToolStripMenuItem.Text = "Manually split"; this.manuallysplitToolStripMenuItem.Click += new System.EventHandler(this.manuallysplitToolStripMenuItem_Click); // // autosplitToolStripMenuItem // this.autosplitToolStripMenuItem.Name = "autosplitToolStripMenuItem"; this.autosplitToolStripMenuItem.Text = "Auto split"; this.autosplitToolStripMenuItem.Click += new System.EventHandler(this.autosplitToolStripMenuItem_Click); // // fullautogrowToolStripMenuItem // this.fullautogrowToolStripMenuItem.Name = "fullautogrowToolStripMenuItem"; this.fullautogrowToolStripMenuItem.Text = "Full auto grow"; this.fullautogrowToolStripMenuItem.Click += new System.EventHandler(this.fullautogrowToolStripMenuItem_Click); // // removeSubNodesToolStripMenuItem // this.removeSubNodesToolStripMenuItem.Name = "removeSubNodesToolStripMenuItem"; this.removeSubNodesToolStripMenuItem.Text = "Remove subnodes"; this.removeSubNodesToolStripMenuItem.Click += new System.EventHandler(this.removeSubNodesToolStripMenuItem_Click); DataGridViewCellStyle Style1 = new DataGridViewCellStyle(); DataGridViewCellStyle Style2 = new DataGridViewCellStyle(); C1 = new DataGridViewTextBoxColumn(); C2 = new DataGridViewTextBoxColumn(); AllowUserToAddRows = false; AllowUserToDeleteRows = false; AllowUserToOrderColumns = false; AllowUserToResizeColumns = false; AllowUserToResizeRows = false; Style1.Alignment = DataGridViewContentAlignment.TopLeft; Style1.WrapMode = DataGridViewTriState.False; AlternatingRowsDefaultCellStyle = Style1; CellBorderStyle = DataGridViewCellBorderStyle.SingleVertical; ClipboardCopyMode = DataGridViewClipboardCopyMode.Disable; Style2.Alignment = DataGridViewContentAlignment.TopLeft; Style2.BackColor = System.Drawing.SystemColors.Control; Style2.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); Style2.ForeColor = System.Drawing.SystemColors.WindowText; Style2.SelectionBackColor = System.Drawing.SystemColors.Highlight; Style2.SelectionForeColor = System.Drawing.SystemColors.HighlightText; Style2.WrapMode = DataGridViewTriState.True; ColumnHeadersDefaultCellStyle = Style2; ColumnHeadersVisible = false; Columns.Add(this.C1); Columns.Add(this.C2); Cursor = Cursors.Hand; EditMode = DataGridViewEditMode.EditProgrammatically; Margin = new Padding(0); MultiSelect = false; Name = "nodeContinuous"; ReadOnly = true; RowHeadersVisible = false; RowTemplate.Height = 18; ScrollBars = ScrollBars.None; // SelectionMode = DataGridViewSelectionMode.ColumnHeaderSelect; this.C1.DefaultCellStyle = Style1; this.C1.Frozen = true; this.C1.HeaderText = "Column1"; this.C1.Name = "c1"; this.C1.ReadOnly = true; this.C1.Width = 50; this.C2.DefaultCellStyle = Style2; this.C2.Frozen = true; this.C2.HeaderText = "Column2"; this.C2.Name = "c2"; this.C2.ReadOnly = true; this.C2.Width = 60; Rows.Add(6); Rows[y].Cells[x].Value = "Node " + Node.Id; ++y; Rows[y].Cells[x].Value = "Mean"; ++y; Rows[y].Cells[x].Value = "Std Dev"; ++y; Rows[y].Cells[x].Value = "Cf Var"; ++y; Rows[y].Cells[x].Value = "n"; ++y; Rows[y].Cells[x].Value = "%"; ++y; ++x; y = 1; double mean = Node.Mean; double stdDev = Node.StdDev; Rows[y].Cells[x].Value = Math.Round(mean, 2); ++y; Rows[y].Cells[x].Value = Math.Round(stdDev, 2); ++y; Rows[y].Cells[x].Value = Math.Round(stdDev / mean, 2); ++y; Rows[y].Cells[x].Value = Node.Table.RowCount; ++y; Rows[y].Cells[x].Value = Math.Round((double)Node.Table.RowCount / (double)Def.TrainingSetRowCount * (double)100, 2); ++y; ++x; y = 1; // AutoSizeColumn(DataGridViewAutoSizeColumnCriteria.HeaderAndRows, 0); // AutoSizeColumn(DataGridViewAutoSizeColumnCriteria.HeaderAndRows, 1); LabelTop.Text = ""; LabelBottom.Text = ""; Height = (RowTemplate.Height * 6) + 4; Width = 110; removeSubNodesToolStripMenuItem.Enabled = false; this.Selector.ResumeLayout(false); this.Selector.Hide(); this.ResumeLayout(false); }
private void treeInitialise() { if (Def.Schema.Target.VariableTypeUserSet == SchemaVariable.VariableTypeEnum.Categorical) { NodeTargetCategorical root = new NodeTargetCategorical(); Def.Tree.Root = root; } else { NodeTargetContinuous root = new NodeTargetContinuous(); Def.Tree.Root = root; } Def.Tree.GrowthState = Tree.GrowthStateEnum.Root; Def.TreeCanBeDisplayed = false; //Def.FrmMain.TreeBuild(); }
//============================================================================================================ //============================================================================================================ //Sets the best p.SplitValue p.Gain //public static double MinInfoCatHeuristicSLOW(NodeTargetContinuous n, Predictor p) { // int i, bestPartitionSplitPoint = 0; // // int dfd; //delete // double minVar = 0, var, lVar, rVar, instanceCount = n.Table.RowCount; // lVar = rVar = minVar = var = 0; // List<NNT> nntLst; // List<string> left = new List<string>(); // List<string> right = new List<string>(); // List<string> leftBest = new List<string>(); // List<string> rightBest = new List<string>(); // //Tries each partition: // //AvsLst[i].N0 = y // //AvsLst[i].N1 = Value of the dependent varible // //AvsLst[i].N2 = Frequency of y // //AvsLst[i].N3 = Total of distinct registries until that row // string sql = // @"SELECT count(*), " + // "0, " + // Def.DbBsTb + "." + p.Variable.Name + " " + // "FROM " // + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " + // "WHERE " // + Def.DbBsTb + "." + Def.DbTableIdName + " = " + // Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " + // "GROUP BY " + // Def.DbBsTb + "." + p.Variable.Name + " " + // "ORDER BY avg(" // + Def.DbBsTb + "." + Def.Schema.Target.Name + ")"; // nntLst = Def.Db.GetNNTLst(sql); // //N1 is the number of registries until a given row // left.Add(nntLst[0].T); // leftBest.Add(nntLst[0].T); // if (nntLst.Count > 0) { // nntLst[0].N1 = nntLst[0].N0; // for (i = 1; i < nntLst.Count; ++i) { // nntLst[i].N1 = nntLst[i - 1].N1 + nntLst[i].N0; // right.Add(nntLst[i].T); // rightBest.Add(nntLst[i].T); // } // } // n.DescendentImpPreCalculated = new List<double>(2); // n.DescendentImpPreCalculated.Add(0); // n.DescendentImpPreCalculated.Add(0); // p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed; // if (nntLst.Count == 0) { // p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; // p.Gain = 0; // return 0; // } // for (i = 0; i < nntLst.Count - 1; ++i) { // //if(i==12 && p.Variable.Name=="apache_2") // // dfd=8; // lVar = Fcn.InfoTimesCount(left, n, p); // rVar = Fcn.InfoTimesCount(right, n, p); // if (i == 0) { // minVar = (lVar + rVar) / instanceCount; // n.DescendentImpPreCalculated[0] = lVar; // n.DescendentImpPreCalculated[1] = rVar; // } else { // var = (lVar + rVar) / instanceCount; // if (var < minVar) { // minVar = var; // bestPartitionSplitPoint = i; // n.DescendentImpPreCalculated[0] = lVar; // n.DescendentImpPreCalculated[1] = rVar; // leftBest.Clear(); // rightBest.Clear(); // foreach (string s in left) // leftBest.Add(s); // foreach (string s in right) // rightBest.Add(s); // } // } // left.Add(right[0]); // right.RemoveAt(0); // } // p.ChildrenGroups.ValueLst[0].Clear(); // p.ChildrenGroups.ValueLst[1].Clear(); // //Add left node values // for (i = 0; i <= bestPartitionSplitPoint; ++i) // p.ChildrenGroups.ValueLst[0].Add(p.CaseSd.IndexOfKey(nntLst[i].T)); // //Add right node values // for (i = bestPartitionSplitPoint + 1; i < nntLst.Count; ++i) // p.ChildrenGroups.ValueLst[1].Add(p.CaseSd.IndexOfKey(nntLst[i].T)); // p.Gain = (n.Imp - minVar) * 100 / n.Imp; // return p.Gain; //} ////============================================================================================================ ////============================================================================================================ //Sets the best p.SplitValue p.Gain public static double MinInfoCont(NodeTargetContinuous n, Predictor p) { int i, leftRowCount = 0, rightRowCount = 0; // int dfd; //delete double minVar, var, lVar, rVar, instanceCount = n.Table.RowCount; lVar = rVar = minVar = var = double.NaN; List<N4> AvsLst; List<int> thresholdIndexLst; //Tries each partition: //AvsLst[i].N0 = y //AvsLst[i].N1 = Value of the dependent varible //AvsLst[i].N2 = Frequency of y //AvsLst[i].N3 = Total of distinct registries until that row string sql = @"SELECT ALL " + Def.DbBsTb + "." + Def.Schema.Target.Name + ", " + Def.DbBsTb + "." + p.Variable.Name + ", " + " count(" + Def.DbBsTb + "." + p.Variable.Name + "), 0 " + "FROM " + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " + "WHERE " + Def.DbBsTb + "." + Def.DbTableIdName + " = " + Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " + " AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " + "GROUP BY " + Def.DbBsTb + "." + Def.Schema.Target.Name + ", " + Def.DbBsTb + "." + p.Variable.Name + " " + "ORDER BY " + Def.DbBsTb + "." + p.Variable.Name; AvsLst = Def.Db.GetN4Lst(sql); //N3 is the number of registries until a given row if (AvsLst.Count > 0) { AvsLst[0].N3 = AvsLst[0].N2; for (i = 1; i < AvsLst.Count; ++i) AvsLst[i].N3 = AvsLst[i - 1].N3 + AvsLst[i].N2; } thresholdIndexLst = Fcn.SetPossibleThresholdIndexLst(AvsLst); if (thresholdIndexLst.Count == 0) { p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; p.Gain = 0; return 0; } n.DescendentImpPreCalculated = new List<double>(2); n.DescendentImpPreCalculated.Add(0); n.DescendentImpPreCalculated.Add(0); p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed; if (AvsLst.Count == 0) { p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; p.Gain = 0; return 0; } for (i = 0; i < thresholdIndexLst.Count; ++i) { lVar = Info(0, thresholdIndexLst[i], AvsLst, out leftRowCount); rVar = Info(thresholdIndexLst[i] + 1, AvsLst.Count - 1, AvsLst, out rightRowCount); if (Double.IsNaN(minVar) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) { var = (leftRowCount * lVar + rightRowCount * rVar) / (leftRowCount + rightRowCount); if (Double.IsNaN(var) == false) { minVar = var; p.SplitValue = AvsLst[thresholdIndexLst[i]].N1; n.DescendentImpPreCalculated[0] = lVar; n.DescendentImpPreCalculated[1] = rVar; } } else { var = (leftRowCount * lVar + rightRowCount * rVar) / (leftRowCount + rightRowCount); if (var < minVar && !Double.IsNaN(var) && leftRowCount >= Def.TreeMinNumberOfCasesPerNode && rightRowCount >= Def.TreeMinNumberOfCasesPerNode) { minVar = var; p.SplitValue = AvsLst[thresholdIndexLst[i]].N1; n.DescendentImpPreCalculated[0] = lVar; n.DescendentImpPreCalculated[1] = rVar; } } } if (Double.IsNaN(minVar)) { p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; p.Gain = 0; return 0; } p.Gain = (n.Imp - minVar) * 100 / n.Imp; p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount; return p.Gain; }
//Sets the best p.SplitValue p.Gain //public static double MinInfoContSQL(NodeTargetContinuous n, Predictor p) { // int i; // double l, r, minVar, var; // l = r = minVar = var = 0; // //Tries each partition: // //p.Gain = (n.Imp - minVar) * 100 / n.Imp; // List<double> vlLst; // List<double> NLst; // OdbcTransaction dbTrans = null; // dbTrans = Def.Db.Con.BeginTransaction(); // string sql = // @"SELECT DISTINCT " + // Def.DbBsTb + "." + p.Variable.Name + " " + // "FROM " + // Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " + // "WHERE " + // Def.DbBsTb + "." + Def.DbTableIdName + " = " + // Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " + // "ORDER BY " + // Def.DbBsTb + "." + p.Variable.Name; // vlLst = Def.Db.GetNumberLst(sql, dbTrans); // p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed; // if (vlLst.Count == 0) { // p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; // p.Gain = 0; // return 0; // } // for (i = 0; i < vlLst.Count; ++i) { // sql = // @"SELECT // COALESCE(variance(" + n.Tree.Schema.Target.Name + "), 0), count(*) " + // "FROM " // + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " + // "WHERE " // + Def.DbBsTb + "." + Def.DbTableIdName + "=" + // Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " and " + // Def.DbBsTb + "." + p.Variable.Name + "<=" + vlLst[i]; // NLst = Def.Db.GetNumberRowLst(sql, dbTrans); // l = NLst[0] * NLst[1] / n.Table.RowCount; // sql = // @"SELECT // COALESCE(variance(" + n.Tree.Schema.Target.Name + "), 0), count(*) " + // "FROM " // + Def.DbBsTb + " , " + Def.DbTrTb + n.Id + " " + // "WHERE " // + Def.DbBsTb + "." + Def.DbTableIdName + "=" + // Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " and " + // Def.DbBsTb + "." + p.Variable.Name + ">" + vlLst[i]; // NLst = Def.Db.GetNumberRowLst(sql, dbTrans); // r = NLst[0] * NLst[1] / n.Table.RowCount; // var = (l + r); // if (i == 0) { // minVar = var; // p.SplitValue = vlLst[i]; // } else { // if (var < minVar) { // minVar = var; // p.SplitValue = vlLst[i]; // } // } // } // dbTrans.Commit(); // p.Gain = (n.Imp - minVar) * 100 / n.Imp; ; // return p.Gain; //} //============================================================================================================ //============================================================================================================ //Sets the best p.SplitValue p.Gain //Used to be: public static double MinInfoCatHeuristic(NodeTargetContinuous n, Predictor p) { public static double MinInfoCat(NodeTargetContinuous n, Predictor p) { int i, bestPartitionSplitPoint = 0; double minVar, lVar, rVar, instanceCount = n.Table.RowCount, partitionInfo; lVar = rVar = minVar = double.NaN; List<string> valLst; List<NNT> DepIndepLst; List<string> left = new List<string>(); string sqlAverage = @"SELECT " + Def.DbBsTb + "." + p.Variable.Name + " " + "FROM " + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " + "WHERE " + Def.DbBsTb + "." + Def.DbTableIdName + " = " + Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " + "AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " + "GROUP BY " + Def.DbBsTb + "." + p.Variable.Name + " " + "ORDER BY avg(" + Def.DbBsTb + "." + Def.Schema.Target.Name + ")"; valLst = Def.Db.GetTextLst(sqlAverage); if (valLst.Count == 0) { p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; p.Gain = 0; return 0; } string sqlDepVar = @"SELECT " + Def.DbBsTb + "." + Def.Schema.Target.Name + ", " + "count(*), " + Def.DbBsTb + "." + p.Variable.Name + " " + "FROM " + Def.DbBsTb + "," + Def.DbTrTb + n.Id + " " + "WHERE " + Def.DbBsTb + "." + Def.DbTableIdName + " = " + Def.DbTrTb + n.Id + "." + Def.DbTableIdName + " " + "AND " + Def.DbBsTb + "." + p.Variable.Name + " IS NOT NULL " + "GROUP BY " + Def.DbBsTb + "." + Def.Schema.Target.Name + ", " + Def.DbBsTb + "." + p.Variable.Name; DepIndepLst = Def.Db.GetNNTLst(sqlDepVar); p.SplitStatus = Predictor.SplitStatusEnum.CanBeUsed; left.Add(valLst[0]); for (i = 0; i < valLst.Count - 1; ++i) { if (Double.IsNaN(minVar)) { partitionInfo = PartitionInfo(left, DepIndepLst, n, p); if (Double.IsNaN(partitionInfo) == false) { minVar = partitionInfo; bestPartitionSplitPoint = i; } } else { partitionInfo = PartitionInfo(left, DepIndepLst, n, p); if (Double.IsNaN(partitionInfo) == false && partitionInfo < minVar) { minVar = partitionInfo; bestPartitionSplitPoint = i; } } left.Add(valLst[i + 1]); } if(Double.IsNaN(minVar)){ p.SplitStatus = Predictor.SplitStatusEnum.NotEnoughCases; p.Gain = 0; return 0; } p.ChildrenGroups.ValueGroupLst[0].Clear(); p.ChildrenGroups.ValueGroupLst[1].Clear(); //Add left node values // int IndexOfKeyValLsti; // Just to check negative indexes for (i = 0; i <= bestPartitionSplitPoint; ++i) { // IndexOfKeyValLsti = p.ValueSd.IndexOfKey(valLst[i]); // if (IndexOfKeyValLsti < 0) // MessageBox.Show("Negative index in MinInfoCatHeuristic", "Error"); p.ChildrenGroups.ValueGroupLst[0].Add(valLst[i]); } //Add right node values for (i = bestPartitionSplitPoint + 1; i < valLst.Count; ++i) { // IndexOfKeyValLsti = p.ValueSd.IndexOfKey(valLst[i]); // if (IndexOfKeyValLsti < 0) // MessageBox.Show("Negative index in MinInfoCatHeuristic", "Error"); p.ChildrenGroups.ValueGroupLst[1].Add(valLst[i]); } p.Gain = (n.Imp - minVar) * 100 / n.Imp; p.Gain *= (double)(n.Table.RowCount - p.NullCount) / n.Table.RowCount; return p.Gain; }