private TreeNode CreateNode(List <EvaluatedColumnPredicate> evaluatedParent, DrugDataset dataset) { var nodeCandidates = new Tuple <double, TreeNode>(double.MaxValue, null); EvaluatedColumnPredicate evaluationCandidate = new EvaluatedColumnPredicate(); double entropieCandidate = -1; _remainingColumns.ForEach(column => { if (!_columnsInConsideration.Contains(column)) { return; } var values = dataset.GetTrainingRows(column, _trainingRatio).Select(c => double.Parse(c)).ToList(); var valuePredicateRange = DeterminePredicateRange(values); for (int i = 0; i < _resultPredicateRange.Item1; i++) { for (int j = 0; j < valuePredicateRange.Item1; j++) { var resultPredicate = new Predicate <double>(d => d >= i * _resultPredicateRange.Item2 && d < (i + 1) * _resultPredicateRange.Item2); var minExp = 0.ToString(); var maxExp = (valuePredicateRange.Item1 * valuePredicateRange.Item2 - ((j + 1) * valuePredicateRange.Item2)).ToString(CultureInfo.InvariantCulture); //var minExp = (j * valuePredicateRange.Item2).ToString(); //var maxExp = ((j + 1) * valuePredicateRange.Item2).ToString(); var reg = new Regex(@"^[\d]+\.[\d]+$"); if (!reg.IsMatch(minExp)) { minExp += ".0"; } if (!reg.IsMatch(maxExp)) { maxExp += ".0"; } double test = 0; if (!double.TryParse(maxExp, out test)) { continue; } var valuePredicate = new Predicate <double>(d => d >= double.Parse(minExp) && d < double.Parse(maxExp)); var trueFilterExpression = $"{column} >= {minExp} AND {column} < {maxExp}"; var falseFilterExpression = $"NOT ({column} >= {minExp} AND {column} < {maxExp})"; var evaluation = new EvaluatedColumnPredicate { Column = column, PredicateDescription = trueFilterExpression }; if (evaluatedParent.Contains(evaluation)) { continue; } var results = dataset.GetTrainingRows("Nicotine", _trainingRatio).Select(c => double.Parse(c)).ToList(); var entropie = CalculateEntropie(results, values, valuePredicate, resultPredicate); if (entropie < 0 || entropie > 1) { Logger.LogError($"Calculated entropie on column {column} with predicate {trueFilterExpression} is not valid => {entropie}"); } if (entropie < nodeCandidates.Item1 && entropie > _stopEntropie) { evaluationCandidate = evaluation; entropieCandidate = entropie; nodeCandidates = new Tuple <double, TreeNode>(entropie, new TreeNode { ChildFalse = null, ChildTrue = null, Column = column, Predicate = valuePredicate, PredicateMaxExp = maxExp, PredicateMinExp = minExp, TrueFilterExpression = trueFilterExpression, FalseFilterExpression = falseFilterExpression, Result = i * _resultPredicateRange.Item2 }); } if (_resultPredicateRange.Item2 < 0.16 || _resultPredicateRange.Item2 > 0.17) { Logger.LogError("Worng result"); } } } }); evaluatedParent.Add(evaluationCandidate); if (nodeCandidates.Item2 != null) { Logger.LogMessage($"Treenode created with column {nodeCandidates.Item2?.Column}, entropie = {entropieCandidate}, predicate = {nodeCandidates.Item2?.TrueFilterExpression}, result = {nodeCandidates.Item2?.Result}"); nodeCandidates.Item2.ChildFalse = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.TrueFilterExpression)); nodeCandidates.Item2.ChildTrue = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.FalseFilterExpression)); } return(nodeCandidates.Item2); }
private void InitResult() { var results = _dataset.GetTrainingRows("Nicotine", _trainingRatio).Select(c => double.Parse(c)).ToList(); _resultPredicateRange = DeterminePredicateRange(results); }