Exemplo n.º 1
0
        //////////////////////////////////////////////////////////////////////

        public KNN(DrugDataset dataset)
        {
            //_dataset = dataset;
            //_dataset.Columns.ForEach(c => {
            //    var rows =_dataset.GetRows(c);


            //});
            //var list = _dataset.GetRows("Nicotine");
            //_dataset.CleanAllColumns();

            //var listDoubles = list.Select(d => double.Parse(d)).ToList();



            //Start -----------------------------------------------------------
            KNN examplekNN = KNN.initialiseKNN(3, "DataSet.txt");

            //List<double> instance2Classify = new List<double> { 12, 11, 500 };

            //foreach (DataRow row in DrugDataTable.Rows)
            //{
            //    tempList.Add(row[columnName].ToString());
            //}

            List <double> instance2Classify = new List <double> {
                12, 11, 500
            };
            string result = examplekNN.Classify(instance2Classify);
            //-----------------------------------------------------------------
        }
Exemplo n.º 2
0
        public FormKnn(DrugDataset drugDataset)
        {
            InitializeComponent();

            _drugDataset = drugDataset;
            _drugDataset.Columns.OrderBy(c => c).ToList().ForEach(c => checkedListBox1.Items.Add(c));
        }
Exemplo n.º 3
0
        private void btnEvaluateFile_Click(object sender, EventArgs e)
        {
            if (!File.Exists(txtEvaluationFile.Text))
            {
                MessageBox.Show("Chemin invalide");
                return;
            }

            var rawDataset        = new DrugDataset();
            var normalizedDataset = new DrugDataset();

            var rawLines = File.ReadAllLines(txtEvaluationFile.Text).Select(x => x.Split(',')).ToList();
            var lines    = File.ReadAllLines(txtEvaluationFile.Text).Select(x => x.Split(',')).ToList();

            Logger.LogMessage($"All lines were read from evaluation file ${txtEvaluationFile.Text}");

            try
            {
                rawDataset.CreateDataset(rawLines);
                normalizedDataset.CreateDataset(lines);
            }
            catch (Exception ex)
            {
                Logger.LogError($"Error happened while creating Dataset: ${ex.Message}");
            }

            normalizedDataset.CleanAllColumns();

            DecisionTreePrediction prediction = new DecisionTreePrediction(rawDataset, normalizedDataset, _tree);

            prediction.Predict();
        }
Exemplo n.º 4
0
 public DecisionTree(DrugDataset dataset, List <string> columnsInConsideration, double stopEntropie, TrainingRatio trainingRatio)
 {
     _dataset = dataset;
     _columnsInConsideration = columnsInConsideration;
     _stopEntropie           = stopEntropie;
     _trainingRatio          = trainingRatio;
     _remainingColumns       = _dataset.Columns.Select(c => c).ToList();
 }
Exemplo n.º 5
0
        public FormData(string file)
        {
            InitializeComponent();

            _csvFile     = file;
            _drugDataset = new DrugDataset();

            CreateDataset();
            FillCombobox();
        }
Exemplo n.º 6
0
        public FormTreeBuilder(DrugDataset drugDataset)
        {
            _drugDataset = drugDataset;
            InitializeComponent();

            checkedListBox1.Items.Clear();
            _drugDataset.Columns.OrderBy(c => c).ToList().ForEach(c => checkedListBox1.Items.Add(c));
            for (int i = 0; i < checkedListBox1.Items.Count; i++)
            {
                checkedListBox1.SetItemChecked(i, true);
            }

            txtEpsilon.Text = 0.ToString();
            rb31.Checked    = true;
        }
Exemplo n.º 7
0
        public KNN2(DrugDataset drugDataset, List <string> columnsInConsideration, DrugDataset predictionDataset,
                    int neighborsInCondideration, KNNInterpretation interpretation)
        {
            _drugDataset              = drugDataset;
            _columnsInConsideration   = columnsInConsideration;
            _predictionDataset        = predictionDataset;
            _neighborsInCondideration = neighborsInCondideration;
            _interpretation           = interpretation;

            _predictionDataset.CleanAllColumns();
            _drugDataset.CleanAllColumns();

            Predict();
            ShowResult();
            Interpret();
        }
Exemplo n.º 8
0
        private DrugDataset CreateFilteredDataset(DrugDataset fromDataset, string filterExpression)
        {
            var dataRows = fromDataset.DrugDataTable.Select(filterExpression);

            var rows = new List <string[]>();

            rows.Add(_dataset.Columns.ToArray());

            foreach (var dataRow in dataRows)
            {
                rows.Add(dataRow.ItemArray.Select(i => i.ToString()).ToArray());
            }

            var newDataset = new DrugDataset();

            newDataset.CreateDataset(rows);

            return(newDataset);
        }
Exemplo n.º 9
0
        private void btnEvaluateFile_Click(object sender, EventArgs e)
        {
            if (!File.Exists(txtEvaluationFile.Text))
            {
                MessageBox.Show("Chemin invalide");
                return;
            }

            int neighborInConsideration = 0;

            if (!int.TryParse(txtNbNeighbor.Text, out neighborInConsideration) || neighborInConsideration <= 0)
            {
                MessageBox.Show("Le nombre de voisin à prendre en considération est invalide");
                return;
            }


            var columnsInConsideration = new List <string>();

            for (int i = 0; i < checkedListBox1.Items.Count; i++)
            {
                if (checkedListBox1.GetItemCheckState(i) == CheckState.Checked)
                {
                    columnsInConsideration.Add(checkedListBox1.Items[i].ToString());
                }
            }

            if (columnsInConsideration.Contains("Nicotine"))
            {
                MessageBox.Show("Les colonnes en considération ne peuvent pas contenir la colonne Nicotine");
                return;
            }

            var dataset = new DrugDataset();

            var lines = File.ReadAllLines(txtEvaluationFile.Text).Select(x => x.Split(',')).ToList();

            Logger.LogMessage($"All lines were read from evaluation file ${txtEvaluationFile.Text}");

            try
            {
                dataset.CreateDataset(lines);
            }
            catch (Exception ex)
            {
                Logger.LogError($"Error happened while creating Dataset: ${ex.Message}");
            }

            KNNInterpretation interpretation = KNNInterpretation.Mode;

            if (rbMediane.Checked)
            {
                interpretation = KNNInterpretation.Median;
            }
            else if (rbMode.Checked)
            {
                interpretation = KNNInterpretation.Mode;
            }
            else if (rbMoyenne.Checked)
            {
                interpretation = KNNInterpretation.Mean;
            }

            Logger.BringToFront();
            _knn2 = new KNN2(_drugDataset, columnsInConsideration, dataset, neighborInConsideration, interpretation);
        }
 public DecisionTreePrediction(DrugDataset rawDataset, DrugDataset normalizedDataset, SavingTree tree)
 {
     _rawDataset        = rawDataset;
     _normalizedDataset = normalizedDataset;
     _tree = tree;
 }
Exemplo n.º 11
0
        private TreeNode CreateNode(List <EvaluatedColumnPredicate> evaluatedParent, DrugDataset dataset)
        {
            var nodeCandidates = new Tuple <double, TreeNode>(double.MaxValue, null);
            EvaluatedColumnPredicate evaluationCandidate = new EvaluatedColumnPredicate();
            double entropieCandidate = -1;

            _remainingColumns.ForEach(column =>
            {
                if (!_columnsInConsideration.Contains(column))
                {
                    return;
                }

                var values = dataset.GetTrainingRows(column, _trainingRatio).Select(c => double.Parse(c)).ToList();
                var valuePredicateRange = DeterminePredicateRange(values);

                for (int i = 0; i < _resultPredicateRange.Item1; i++)
                {
                    for (int j = 0; j < valuePredicateRange.Item1; j++)
                    {
                        var resultPredicate = new Predicate <double>(d => d >= i * _resultPredicateRange.Item2 && d < (i + 1) * _resultPredicateRange.Item2);

                        var minExp = 0.ToString();
                        var maxExp = (valuePredicateRange.Item1 * valuePredicateRange.Item2 - ((j + 1) * valuePredicateRange.Item2)).ToString(CultureInfo.InvariantCulture);
                        //var minExp = (j * valuePredicateRange.Item2).ToString();
                        //var maxExp = ((j + 1) * valuePredicateRange.Item2).ToString();
                        var reg = new Regex(@"^[\d]+\.[\d]+$");

                        if (!reg.IsMatch(minExp))
                        {
                            minExp += ".0";
                        }

                        if (!reg.IsMatch(maxExp))
                        {
                            maxExp += ".0";
                        }

                        double test = 0;
                        if (!double.TryParse(maxExp, out test))
                        {
                            continue;
                        }

                        var valuePredicate = new Predicate <double>(d => d >= double.Parse(minExp) && d < double.Parse(maxExp));

                        var trueFilterExpression  = $"{column} >= {minExp} AND {column} < {maxExp}";
                        var falseFilterExpression = $"NOT ({column} >= {minExp} AND {column} < {maxExp})";

                        var evaluation = new EvaluatedColumnPredicate {
                            Column = column, PredicateDescription = trueFilterExpression
                        };

                        if (evaluatedParent.Contains(evaluation))
                        {
                            continue;
                        }

                        var results  = dataset.GetTrainingRows("Nicotine", _trainingRatio).Select(c => double.Parse(c)).ToList();
                        var entropie = CalculateEntropie(results, values, valuePredicate, resultPredicate);

                        if (entropie < 0 || entropie > 1)
                        {
                            Logger.LogError($"Calculated entropie on column {column} with predicate {trueFilterExpression} is not valid => {entropie}");
                        }

                        if (entropie < nodeCandidates.Item1 && entropie > _stopEntropie)
                        {
                            evaluationCandidate = evaluation;
                            entropieCandidate   = entropie;
                            nodeCandidates      = new Tuple <double, TreeNode>(entropie, new TreeNode
                            {
                                ChildFalse            = null,
                                ChildTrue             = null,
                                Column                = column,
                                Predicate             = valuePredicate,
                                PredicateMaxExp       = maxExp,
                                PredicateMinExp       = minExp,
                                TrueFilterExpression  = trueFilterExpression,
                                FalseFilterExpression = falseFilterExpression,
                                Result                = i * _resultPredicateRange.Item2
                            });
                        }

                        if (_resultPredicateRange.Item2 < 0.16 || _resultPredicateRange.Item2 > 0.17)
                        {
                            Logger.LogError("Worng result");
                        }
                    }
                }
            });


            evaluatedParent.Add(evaluationCandidate);

            if (nodeCandidates.Item2 != null)
            {
                Logger.LogMessage($"Treenode created with column {nodeCandidates.Item2?.Column}, entropie = {entropieCandidate}, predicate = {nodeCandidates.Item2?.TrueFilterExpression}, result = {nodeCandidates.Item2?.Result}");

                nodeCandidates.Item2.ChildFalse = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.TrueFilterExpression));
                nodeCandidates.Item2.ChildTrue  = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.FalseFilterExpression));
            }

            return(nodeCandidates.Item2);
        }