private void CalculateColumnInformationGain() { foreach (var decisionColumn in DecisionColumns) { decisionColumn.InformationGain = TotalEntropy - decisionColumn.Entropy; decisionColumn.ColumnIndex = DecisionColumns.IndexOf(decisionColumn); } }
private void DoUnknownThing(List <int> bestOneIndexes, List <List <int> > fullHierarchy) { var index = -1; var selectedIndexesForNextRound = new List <int>(); var newHierarchy = new List <List <int> >(); var locallyUsedColumnIndexes = new List <int>(); var localAttributeValue = string.Empty; foreach (var bestOneIndex in bestOneIndexes) { index++; var bestOne = DecisionColumns.First(col => col.ColumnIndex == bestOneIndex); bestOne.ColumnIndex = bestOneIndex; var rejectedList = DecisionColumns.Where(column => AlreadyUsedIndexes.Contains((int)column.ColumnIndex)); AlreadyUsedIndexes.Add((int)bestOne.ColumnIndex); //Dodajemy ja do listy na przyszlosc Console.WriteLine($"Columnd index {bestOne.ColumnIndex}"); foreach (var attribute in bestOne.Attributes) // i dla kazdego atrybutu najlepszej kolumny czyli np. dla pogody iterujemy sie po: sloecznie, pochmurno, itd. { var attributeRows = attribute.NegativeRowNumbers.Concat(attribute.PositiveRowNumbers) .ToList(); // bierzemy wszystkie indeksy wierszy w jakich wystepuja rejectedList = DecisionColumns.Where(column => AlreadyUsedIndexes.Contains((int)column.ColumnIndex)); var tempList = DecisionColumns.Except(rejectedList); foreach (var decisionColumn in tempList ) // i iterujemy sie po wszystkich niewykorzystanych kolumnach { decisionColumn.ColumnIndex = DecisionColumns.IndexOf(decisionColumn); var innerAttributes = decisionColumn.Attributes .Where(innerAttribute => innerAttribute.PositiveRowNumbers.Intersect(attributeRows).Any() || innerAttribute.NegativeRowNumbers.Intersect(attributeRows).Any()).ToList(); //to sa te atrybuty, ktory wystepuja w wierszach 'attribute' decisionColumn.CaseEntropy = 0; var allNegatives = innerAttributes.SelectMany(innerAttr => innerAttr.NegativeRowNumbers) .ToList(); if (allNegatives.Count(neg => attributeRows.Contains(neg)) > 0) { foreach (var innerAttribute in innerAttributes) { var innerRows = innerAttribute.NegativeRowNumbers .Concat(innerAttribute.PositiveRowNumbers) .ToList(); var innerPositiveRows = innerAttribute.PositiveRowNumbers .Where(inner => attributeRows.Contains(inner)).ToList(); var innerNegativeRows = innerAttribute.NegativeRowNumbers .Where(inner => attributeRows.Contains(inner)).ToList(); var positiveEntropy = innerPositiveRows.Count != 0 ? MathExtension.GetBinaryLogaritm(innerPositiveRows.Count, innerNegativeRows.Count + innerPositiveRows.Count) : 0d; var negativeEntropy = innerNegativeRows.Count != 0 ? MathExtension.GetBinaryLogaritm(innerNegativeRows.Count, innerNegativeRows.Count + innerPositiveRows.Count) : 0d; decisionColumn.CaseEntropy += (double)(innerNegativeRows.Count + innerPositiveRows.Count) / attributeRows.Count * (-positiveEntropy - negativeEntropy); } } else { decisionColumn.CaseEntropy = -1; } locallyUsedColumnIndexes.Add(Convert.ToInt32(decisionColumn.ColumnIndex)); } Console.WriteLine("Nazwa atrybutu" + attribute.Value); localAttributeValue = attribute.Value; if (tempList.Any()) { var x = tempList.MinBy(column => column.CaseEntropy); if (attribute.PositiveRowNumbers.Count == 0 && attribute.NegativeRowNumbers.Count > 0) { Console.WriteLine("NOPE NOPE NOPE"); } else if (x.CaseEntropy > -1) { Console.WriteLine("Index kolumny >>>> " + x.ColumnIndex); selectedIndexesForNextRound.Add(Convert.ToInt32(x.ColumnIndex)); } else { if (attribute.PositiveRowNumbers.Count == 0 && attribute.NegativeRowNumbers.Count > 0) { Console.WriteLine("Nie"); } if (attribute.PositiveRowNumbers.Count > 0 && attribute.NegativeRowNumbers.Count == 0) { Console.WriteLine("Tak"); } } var currentBranchHierarchy = new List <int>(); if (fullHierarchy?[index] != null) { currentBranchHierarchy.AddRange(fullHierarchy[index]); } else { currentBranchHierarchy.Add(bestOneIndexes.First()); } if (localAttributeValue != "-1" && x.CaseEntropy > -1) { var upperColumn = DecisionColumns[bestOneIndexes[index]]; currentBranchHierarchy.Add(upperColumn.Attributes.FindIndex(atr => atr.Value == localAttributeValue)); currentBranchHierarchy.Add(selectedIndexesForNextRound[index]); newHierarchy.Add(currentBranchHierarchy); } } else { var newThing = GetRowsFromPath(fullHierarchy[index]); var positiveRows = attribute.PositiveRowNumbers.Intersect(newThing.PositiveRowNumbers).ToList(); var negativeRows = attribute.NegativeRowNumbers.Intersect(newThing.NegativeRowNumbers).ToList(); var x = attribute.NegativeRowNumbers.Count; var y = attribute.PositiveRowNumbers.Count; if (positiveRows.Count == 0 && negativeRows.Count > 0) { Console.WriteLine("Nie!"); } else if (positiveRows.Count > 0 && negativeRows.Count == 0) { Console.WriteLine("Tak!"); } } } //Sprzatanie tutaj? Console.WriteLine("----------------------"); } AlreadyUsedIndexes.AddRange(locallyUsedColumnIndexes); if (selectedIndexesForNextRound.Count > 1) { DoUnknownThing(selectedIndexesForNextRound, newHierarchy); } }