private void ProcessData(
            string dependentFeatureName,
            Matrix <double> trainingData,
            IList <TPredictionResult> expectedValues,
            IList <string> dataColumnNames,
            ConcurrentDictionary <int, TPredictionResult> partialResults,
            IKnnAdditionalParams knnAdditionalParams)
        {
            Parallel.For(0, trainingData.RowCount, rowIdx =>
            {
                var trainingDataExceptRow = trainingData.RemoveRow(rowIdx);

                var expectedValuesExceptRow = expectedValues.ToList();
                expectedValuesExceptRow.RemoveAt(rowIdx);

                var queryMatrix        = Matrix <double> .Build.DenseOfRowVectors(trainingData.Row(rowIdx));
                var queryDataFrame     = new DataFrame(queryMatrix);
                var knnPredictionModel = new KnnPredictionModel <TPredictionResult>(trainingDataExceptRow, expectedValuesExceptRow,
                                                                                    dataColumnNames, knnAdditionalParams.KNeighbors, knnAdditionalParams.UseWeightedDistances);
                var results = _knnPredictor.Predict(
                    queryDataFrame,
                    knnPredictionModel,
                    dependentFeatureName);
                TPredictionResult result = results.First();
                partialResults.AddOrUpdate(rowIdx, result, (i, d) => result);
            });
        }
        private double ProcessDataAndQuantifyErrorRate(
            string dependentFeatureName,
            Matrix <double> trainingData,
            IList <TPredictionResult> expectedValues,
            IList <string> dataColumnsNames,
            IKnnAdditionalParams knnAdditionalParams)
        {
            var partialResults = new ConcurrentDictionary <int, TPredictionResult>();

            ProcessData(dependentFeatureName, trainingData, expectedValues, dataColumnsNames, partialResults, knnAdditionalParams);
            var actualValues = partialResults.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Value).ToList();

            return(_errorMeasure.CalculateError(expectedValues, actualValues));
        }
        protected IBackwardsEliminationKnnModel <TPredictionResult> PerformBackwardsElimination(IDataFrame dataFrame, string dependentFeatureName, IKnnAdditionalParams additionalParams)
        {
            Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> > preparedData = PrepareTrainingData(dataFrame, dependentFeatureName);
            var dataColumnsNames = preparedData.Item3;
            var trainingData     = _dataNormalizer.NormalizeColumns(preparedData.Item1);
            var expectedValues   = preparedData.Item2;

            // TODO: refactor this - ugly!
            _knnPredictor.NormalizeNumericValues = false;
            double baseErrorRate = ProcessDataAndQuantifyErrorRate(
                dependentFeatureName,
                trainingData,
                expectedValues,
                dataColumnsNames,
                additionalParams);

            var actualDataColumnNames            = new List <string>(dataColumnsNames);
            var anyFeatureRemovedInThisIteration = true;
            var removedFeaturesInfo = new List <IBackwardsEliminationRemovedFeatureData>();

            while (anyFeatureRemovedInThisIteration)
            {
                anyFeatureRemovedInThisIteration = false;
                var candidateFeaturesToEliminate = new Dictionary <int, double>();
                foreach (var columnIdx in Enumerable.Range(0, actualDataColumnNames.Count))
                {
                    var newFeatureNames = new List <string>(actualDataColumnNames);
                    newFeatureNames.RemoveAt(columnIdx);

                    var trainingDataWithoutColumn = trainingData.RemoveColumn(columnIdx);
                    var newDataPredictionError    = ProcessDataAndQuantifyErrorRate(
                        dependentFeatureName,
                        trainingDataWithoutColumn,
                        expectedValues,
                        newFeatureNames,
                        additionalParams);
                    if (newDataPredictionError <= baseErrorRate)
                    {
                        var errorGain = baseErrorRate - newDataPredictionError;
                        candidateFeaturesToEliminate.Add(columnIdx, errorGain);
                    }
                }
                if (!candidateFeaturesToEliminate.Any())
                {
                    break;
                }
                var bestFeatureToRemove = candidateFeaturesToEliminate.OrderBy(kvp => kvp.Value).First();
                anyFeatureRemovedInThisIteration = true;
                removedFeaturesInfo.Add(new BackwardsEliminationRemovedFeatureData(bestFeatureToRemove.Value, actualDataColumnNames[bestFeatureToRemove.Key]));
                actualDataColumnNames.RemoveAt(bestFeatureToRemove.Key);
                baseErrorRate = bestFeatureToRemove.Value;
            }

            return(new BackwardsEliminationKnnModel <TPredictionResult>(
                       preparedData.Item1,
                       expectedValues,
                       dataColumnsNames,
                       additionalParams.KNeighbors,
                       additionalParams.UseWeightedDistances,
                       removedFeaturesInfo));
        }