////////////////////////////////////////////////////////////////////// public KNN(DrugDataset dataset) { //_dataset = dataset; //_dataset.Columns.ForEach(c => { // var rows =_dataset.GetRows(c); //}); //var list = _dataset.GetRows("Nicotine"); //_dataset.CleanAllColumns(); //var listDoubles = list.Select(d => double.Parse(d)).ToList(); //Start ----------------------------------------------------------- KNN examplekNN = KNN.initialiseKNN(3, "DataSet.txt"); //List<double> instance2Classify = new List<double> { 12, 11, 500 }; //foreach (DataRow row in DrugDataTable.Rows) //{ // tempList.Add(row[columnName].ToString()); //} List <double> instance2Classify = new List <double> { 12, 11, 500 }; string result = examplekNN.Classify(instance2Classify); //----------------------------------------------------------------- }
public FormKnn(DrugDataset drugDataset) { InitializeComponent(); _drugDataset = drugDataset; _drugDataset.Columns.OrderBy(c => c).ToList().ForEach(c => checkedListBox1.Items.Add(c)); }
private void btnEvaluateFile_Click(object sender, EventArgs e) { if (!File.Exists(txtEvaluationFile.Text)) { MessageBox.Show("Chemin invalide"); return; } var rawDataset = new DrugDataset(); var normalizedDataset = new DrugDataset(); var rawLines = File.ReadAllLines(txtEvaluationFile.Text).Select(x => x.Split(',')).ToList(); var lines = File.ReadAllLines(txtEvaluationFile.Text).Select(x => x.Split(',')).ToList(); Logger.LogMessage($"All lines were read from evaluation file ${txtEvaluationFile.Text}"); try { rawDataset.CreateDataset(rawLines); normalizedDataset.CreateDataset(lines); } catch (Exception ex) { Logger.LogError($"Error happened while creating Dataset: ${ex.Message}"); } normalizedDataset.CleanAllColumns(); DecisionTreePrediction prediction = new DecisionTreePrediction(rawDataset, normalizedDataset, _tree); prediction.Predict(); }
public DecisionTree(DrugDataset dataset, List <string> columnsInConsideration, double stopEntropie, TrainingRatio trainingRatio) { _dataset = dataset; _columnsInConsideration = columnsInConsideration; _stopEntropie = stopEntropie; _trainingRatio = trainingRatio; _remainingColumns = _dataset.Columns.Select(c => c).ToList(); }
public FormData(string file) { InitializeComponent(); _csvFile = file; _drugDataset = new DrugDataset(); CreateDataset(); FillCombobox(); }
public FormTreeBuilder(DrugDataset drugDataset) { _drugDataset = drugDataset; InitializeComponent(); checkedListBox1.Items.Clear(); _drugDataset.Columns.OrderBy(c => c).ToList().ForEach(c => checkedListBox1.Items.Add(c)); for (int i = 0; i < checkedListBox1.Items.Count; i++) { checkedListBox1.SetItemChecked(i, true); } txtEpsilon.Text = 0.ToString(); rb31.Checked = true; }
public KNN2(DrugDataset drugDataset, List <string> columnsInConsideration, DrugDataset predictionDataset, int neighborsInCondideration, KNNInterpretation interpretation) { _drugDataset = drugDataset; _columnsInConsideration = columnsInConsideration; _predictionDataset = predictionDataset; _neighborsInCondideration = neighborsInCondideration; _interpretation = interpretation; _predictionDataset.CleanAllColumns(); _drugDataset.CleanAllColumns(); Predict(); ShowResult(); Interpret(); }
private DrugDataset CreateFilteredDataset(DrugDataset fromDataset, string filterExpression) { var dataRows = fromDataset.DrugDataTable.Select(filterExpression); var rows = new List <string[]>(); rows.Add(_dataset.Columns.ToArray()); foreach (var dataRow in dataRows) { rows.Add(dataRow.ItemArray.Select(i => i.ToString()).ToArray()); } var newDataset = new DrugDataset(); newDataset.CreateDataset(rows); return(newDataset); }
private void btnEvaluateFile_Click(object sender, EventArgs e) { if (!File.Exists(txtEvaluationFile.Text)) { MessageBox.Show("Chemin invalide"); return; } int neighborInConsideration = 0; if (!int.TryParse(txtNbNeighbor.Text, out neighborInConsideration) || neighborInConsideration <= 0) { MessageBox.Show("Le nombre de voisin à prendre en considération est invalide"); return; } var columnsInConsideration = new List <string>(); for (int i = 0; i < checkedListBox1.Items.Count; i++) { if (checkedListBox1.GetItemCheckState(i) == CheckState.Checked) { columnsInConsideration.Add(checkedListBox1.Items[i].ToString()); } } if (columnsInConsideration.Contains("Nicotine")) { MessageBox.Show("Les colonnes en considération ne peuvent pas contenir la colonne Nicotine"); return; } var dataset = new DrugDataset(); var lines = File.ReadAllLines(txtEvaluationFile.Text).Select(x => x.Split(',')).ToList(); Logger.LogMessage($"All lines were read from evaluation file ${txtEvaluationFile.Text}"); try { dataset.CreateDataset(lines); } catch (Exception ex) { Logger.LogError($"Error happened while creating Dataset: ${ex.Message}"); } KNNInterpretation interpretation = KNNInterpretation.Mode; if (rbMediane.Checked) { interpretation = KNNInterpretation.Median; } else if (rbMode.Checked) { interpretation = KNNInterpretation.Mode; } else if (rbMoyenne.Checked) { interpretation = KNNInterpretation.Mean; } Logger.BringToFront(); _knn2 = new KNN2(_drugDataset, columnsInConsideration, dataset, neighborInConsideration, interpretation); }
public DecisionTreePrediction(DrugDataset rawDataset, DrugDataset normalizedDataset, SavingTree tree) { _rawDataset = rawDataset; _normalizedDataset = normalizedDataset; _tree = tree; }
private TreeNode CreateNode(List <EvaluatedColumnPredicate> evaluatedParent, DrugDataset dataset) { var nodeCandidates = new Tuple <double, TreeNode>(double.MaxValue, null); EvaluatedColumnPredicate evaluationCandidate = new EvaluatedColumnPredicate(); double entropieCandidate = -1; _remainingColumns.ForEach(column => { if (!_columnsInConsideration.Contains(column)) { return; } var values = dataset.GetTrainingRows(column, _trainingRatio).Select(c => double.Parse(c)).ToList(); var valuePredicateRange = DeterminePredicateRange(values); for (int i = 0; i < _resultPredicateRange.Item1; i++) { for (int j = 0; j < valuePredicateRange.Item1; j++) { var resultPredicate = new Predicate <double>(d => d >= i * _resultPredicateRange.Item2 && d < (i + 1) * _resultPredicateRange.Item2); var minExp = 0.ToString(); var maxExp = (valuePredicateRange.Item1 * valuePredicateRange.Item2 - ((j + 1) * valuePredicateRange.Item2)).ToString(CultureInfo.InvariantCulture); //var minExp = (j * valuePredicateRange.Item2).ToString(); //var maxExp = ((j + 1) * valuePredicateRange.Item2).ToString(); var reg = new Regex(@"^[\d]+\.[\d]+$"); if (!reg.IsMatch(minExp)) { minExp += ".0"; } if (!reg.IsMatch(maxExp)) { maxExp += ".0"; } double test = 0; if (!double.TryParse(maxExp, out test)) { continue; } var valuePredicate = new Predicate <double>(d => d >= double.Parse(minExp) && d < double.Parse(maxExp)); var trueFilterExpression = $"{column} >= {minExp} AND {column} < {maxExp}"; var falseFilterExpression = $"NOT ({column} >= {minExp} AND {column} < {maxExp})"; var evaluation = new EvaluatedColumnPredicate { Column = column, PredicateDescription = trueFilterExpression }; if (evaluatedParent.Contains(evaluation)) { continue; } var results = dataset.GetTrainingRows("Nicotine", _trainingRatio).Select(c => double.Parse(c)).ToList(); var entropie = CalculateEntropie(results, values, valuePredicate, resultPredicate); if (entropie < 0 || entropie > 1) { Logger.LogError($"Calculated entropie on column {column} with predicate {trueFilterExpression} is not valid => {entropie}"); } if (entropie < nodeCandidates.Item1 && entropie > _stopEntropie) { evaluationCandidate = evaluation; entropieCandidate = entropie; nodeCandidates = new Tuple <double, TreeNode>(entropie, new TreeNode { ChildFalse = null, ChildTrue = null, Column = column, Predicate = valuePredicate, PredicateMaxExp = maxExp, PredicateMinExp = minExp, TrueFilterExpression = trueFilterExpression, FalseFilterExpression = falseFilterExpression, Result = i * _resultPredicateRange.Item2 }); } if (_resultPredicateRange.Item2 < 0.16 || _resultPredicateRange.Item2 > 0.17) { Logger.LogError("Worng result"); } } } }); evaluatedParent.Add(evaluationCandidate); if (nodeCandidates.Item2 != null) { Logger.LogMessage($"Treenode created with column {nodeCandidates.Item2?.Column}, entropie = {entropieCandidate}, predicate = {nodeCandidates.Item2?.TrueFilterExpression}, result = {nodeCandidates.Item2?.Result}"); nodeCandidates.Item2.ChildFalse = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.TrueFilterExpression)); nodeCandidates.Item2.ChildTrue = CreateNode(evaluatedParent, CreateFilteredDataset(dataset, nodeCandidates.Item2.FalseFilterExpression)); } return(nodeCandidates.Item2); }