예제 #1
0
        private TreeNode CreateNode(List <EvaluatedColumnPredicate> evaluatedParent, DrugDataset dataset)
        {
            var nodeCandidates = new Tuple <double, TreeNode>(double.MaxValue, null);
            EvaluatedColumnPredicate evaluationCandidate = new EvaluatedColumnPredicate();
            double entropieCandidate = -1;

            _remainingColumns.ForEach(column =>
            {
                if (!_columnsInConsideration.Contains(column))
                {
                    return;
                }

                var values = dataset.GetTrainingRows(column, _trainingRatio).Select(c => double.Parse(c)).ToList();
                var valuePredicateRange = DeterminePredicateRange(values);

                for (int i = 0; i < _resultPredicateRange.Item1; i++)
                {
                    for (int j = 0; j < valuePredicateRange.Item1; j++)
                    {
                        var resultPredicate = new Predicate <double>(d => d >= i * _resultPredicateRange.Item2 && d < (i + 1) * _resultPredicateRange.Item2);

                        var minExp = 0.ToString();
                        var maxExp = (valuePredicateRange.Item1 * valuePredicateRange.Item2 - ((j + 1) * valuePredicateRange.Item2)).ToString(CultureInfo.InvariantCulture);
                        //var minExp = (j * valuePredicateRange.Item2).ToString();
                        //var maxExp = ((j + 1) * valuePredicateRange.Item2).ToString();
                        var reg = new Regex(@"^[\d]+\.[\d]+$");

                        if (!reg.IsMatch(minExp))
                        {
                            minExp += ".0";
                        }

                        if (!reg.IsMatch(maxExp))
                        {
                            maxExp += ".0";
                        }

                        double test = 0;
                        if (!double.TryParse(maxExp, out test))
                        {
                            continue;
                        }

                        var valuePredicate = new Predicate <double>(d => d >= double.Parse(minExp) && d < double.Parse(maxExp));

                        var trueFilterExpression  = $"{column} >= {minExp} AND {column} < {maxExp}";
                        var falseFilterExpression = $"NOT ({column} >= {minExp} AND {column} < {maxExp})";

                        var evaluation = new EvaluatedColumnPredicate {
                            Column = column, PredicateDescription = trueFilterExpression
                        };

                        if (evaluatedParent.Contains(evaluation))
                        {
                            continue;
                        }

                        var results  = dataset.GetTrainingRows("Nicotine", _trainingRatio).Select(c => double.Parse(c)).ToList();
                        var entropie = CalculateEntropie(results, values, valuePredicate, resultPredicate);

                        if (entropie < 0 || entropie > 1)
                        {
                            Logger.LogError($"Calculated entropie on column {column} with predicate {trueFilterExpression} is not valid => {entropie}");
                        }

                        if (entropie < nodeCandidates.Item1 && entropie > _stopEntropie)
                        {
                            evaluationCandidate = evaluation;
                            entropieCandidate   = entropie;
                            nodeCandidates      = new Tuple <double, TreeNode>(entropie, new TreeNode
                            {
                                ChildFalse            = null,
                                ChildTrue             = null,
                                Column                = column,
                                Predicate             = valuePredicate,
                                PredicateMaxExp       = maxExp,
                                PredicateMinExp       = minExp,
                                TrueFilterExpression  = trueFilterExpression,
                                FalseFilterExpression = falseFilterExpression,
                                Result                = i * _resultPredicateRange.Item2
                            });
                        }

                        if (_resultPredicateRange.Item2 < 0.16 || _resultPredicateRange.Item2 > 0.17)
                        {
                            Logger.LogError("Worng result");
                        }
                    }
                }
            });


            evaluatedParent.Add(evaluationCandidate);

            if (nodeCandidates.Item2 != null)
            {
                Logger.LogMessage($"Treenode created with column {nodeCandidates.Item2?.Column}, entropie = {entropieCandidate}, predicate = {nodeCandidates.Item2?.TrueFilterExpression}, result = {nodeCandidates.Item2?.Result}");

                nodeCandidates.Item2.ChildFalse = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.TrueFilterExpression));
                nodeCandidates.Item2.ChildTrue  = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.FalseFilterExpression));
            }

            return(nodeCandidates.Item2);
        }
예제 #2
0
        private void InitResult()
        {
            var results = _dataset.GetTrainingRows("Nicotine", _trainingRatio).Select(c => double.Parse(c)).ToList();

            _resultPredicateRange = DeterminePredicateRange(results);
        }