Ejemplo n.º 1
0
        public List <int> ColumnsWithVarianceSmaller(double variance)
        {
            List <int> columns = new List <int>();

            for (int i = 0; i < PreprocessingData.Columns; ++i)
            {
                if (PreprocessingData.VariableHasType <double>(i))
                {
                    double columnVariance = PreprocessingData.GetVariance <double>(i);
                    if (columnVariance < variance)
                    {
                        columns.Add(i);
                    }
                }
                else if (PreprocessingData.VariableHasType <DateTime>(i))
                {
                    double columnVariance = (double)PreprocessingData.GetVariance <DateTime>(i).Ticks / TimeSpan.TicksPerSecond;
                    if (columnVariance < variance)
                    {
                        columns.Add(i);
                    }
                }
            }
            return(columns);
        }
Ejemplo n.º 2
0
#pragma warning restore 0067
        #endregion

        #region Manipulations
        private void ReplaceIndicesByValue(IDictionary <int, IList <int> > cells, Func <int, double> doubleAggregator = null,
                                           Func <int, DateTime> dateTimeAggregator = null, Func <int, string> stringAggregator = null)
        {
            PreprocessingData.InTransaction(() => {
                foreach (var column in cells)
                {
                    if (doubleAggregator != null && PreprocessingData.VariableHasType <double>(column.Key))
                    {
                        var value = doubleAggregator(column.Key);
                        foreach (int index in column.Value)
                        {
                            PreprocessingData.SetCell <double>(column.Key, index, value);
                        }
                    }
                    else if (dateTimeAggregator != null && PreprocessingData.VariableHasType <DateTime>(column.Key))
                    {
                        var value = dateTimeAggregator(column.Key);
                        foreach (int index in column.Value)
                        {
                            PreprocessingData.SetCell <DateTime>(column.Key, index, value);
                        }
                    }
                    else if (stringAggregator != null && PreprocessingData.VariableHasType <string>(column.Key))
                    {
                        var value = stringAggregator(column.Key);
                        foreach (int index in column.Value)
                        {
                            PreprocessingData.SetCell <string>(column.Key, index, value);
                        }
                    }
                }
            });
        }
Ejemplo n.º 3
0
        public void Shuffle(bool shuffleRangesSeparately)
        {
            var random = new FastRandom();

            if (shuffleRangesSeparately)
            {
                var ranges = new[] { PreprocessingData.TestPartition, PreprocessingData.TrainingPartition };
                PreprocessingData.InTransaction(() => {
                    // process all given ranges - e.g. TrainingPartition, TestPartition
                    foreach (IntRange range in ranges)
                    {
                        var indices         = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
                        var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
                        for (int i = range.Start, j = 0; i < range.End; i++, j++)
                        {
                            indices[i] = shuffledIndices[j];
                        }

                        ReOrderToIndices(indices);
                    }
                });
            }
            else
            {
                PreprocessingData.InTransaction(() => {
                    var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
                    indices.ShuffleInPlace(random);
                    ReOrderToIndices(indices);
                });
            }
        }
Ejemplo n.º 4
0
 private void ReplaceIndicesByValues(IDictionary <int, IList <int> > cells, Func <int, IEnumerable <double> > doubleAggregator = null,
                                     Func <int, IEnumerable <DateTime> > dateTimeAggregator = null, Func <int, IEnumerable <string> > stringAggregator = null)
 {
     PreprocessingData.InTransaction(() => {
         foreach (var column in cells)
         {
             if (doubleAggregator != null && PreprocessingData.VariableHasType <double>(column.Key))
             {
                 var values = doubleAggregator(column.Key);
                 foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
                 {
                     PreprocessingData.SetCell <double>(column.Key, pair.row, pair.value);
                 }
             }
             else if (dateTimeAggregator != null && PreprocessingData.VariableHasType <DateTime>(column.Key))
             {
                 var values = dateTimeAggregator(column.Key);
                 foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
                 {
                     PreprocessingData.SetCell <DateTime>(column.Key, pair.row, pair.value);
                 }
             }
             else if (stringAggregator != null && PreprocessingData.VariableHasType <string>(column.Key))
             {
                 var values = stringAggregator(column.Key);
                 foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
                 {
                     PreprocessingData.SetCell <string>(column.Key, pair.row, pair.value);
                 }
             }
         }
     });
 }
Ejemplo n.º 5
0
 public void ReplaceIndicesByMode(IDictionary <int, IList <int> > cells, bool considerSelection = false)
 {
     ReplaceIndicesByValue(cells,
                           col => PreprocessingData.GetMode <double>(col, considerSelection),
                           col => PreprocessingData.GetMode <DateTime>(col, considerSelection),
                           col => PreprocessingData.GetMode <string>(col, considerSelection));
 }
Ejemplo n.º 6
0
        private void Interpolate(KeyValuePair <int, IList <int> > column, int prevIndex, int nextIndex)
        {
            int valuesToInterpolate = nextIndex - prevIndex;

            if (PreprocessingData.VariableHasType <double>(column.Key))
            {
                double prev = PreprocessingData.GetCell <double>(column.Key, prevIndex);
                double next = PreprocessingData.GetCell <double>(column.Key, nextIndex);
                double interpolationStep = (next - prev) / valuesToInterpolate;

                for (int i = prevIndex; i < nextIndex; ++i)
                {
                    double interpolated = prev + (interpolationStep * (i - prevIndex));
                    PreprocessingData.SetCell <double>(column.Key, i, interpolated);
                }
            }
            else if (PreprocessingData.VariableHasType <DateTime>(column.Key))
            {
                DateTime prev = PreprocessingData.GetCell <DateTime>(column.Key, prevIndex);
                DateTime next = PreprocessingData.GetCell <DateTime>(column.Key, nextIndex);
                double   interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;

                for (int i = prevIndex; i < nextIndex; ++i)
                {
                    DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
                    PreprocessingData.SetCell <DateTime>(column.Key, i, interpolated);
                }
            }
        }
Ejemplo n.º 7
0
 private void DeleteColumns(List <int> columns)
 {
     PreprocessingData.InTransaction(() => {
         foreach (int column in columns.OrderByDescending(x => x))
         {
             PreprocessingData.DeleteColumn(column);
         }
     });
 }
Ejemplo n.º 8
0
 private void DeleteRows(List <int> rows)
 {
     PreprocessingData.InTransaction(() => {
         foreach (int row in rows.OrderByDescending(x => x))
         {
             PreprocessingData.DeleteRow(row);
         }
     });
 }
Ejemplo n.º 9
0
 public bool[] Preview(IList<IFilter> filters, bool isAndCombination) {
   IList<IFilter> activeFilters = filters.Where(f => f.Active && f.ConstraintData != null).ToList<IFilter>();
   if (activeFilters.Count > 0) {
     var result = GetActiveFilterResult(activeFilters, isAndCombination);
     PreprocessingData.SetFilter(result);
     return result;
   } else {
     return CreateBoolArray(PreprocessingData.Rows, false);
   }
 }
Ejemplo n.º 10
0
        private int IndexOfPrevPresentValue(int columnIndex, int start)
        {
            int offset = start - 1;

            while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset))
            {
                offset--;
            }

            return(offset);
        }
Ejemplo n.º 11
0
        private int IndexOfNextPresentValue(int columnIndex, int start)
        {
            int offset = start + 1;

            while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset))
            {
                offset++;
            }

            return(offset);
        }
Ejemplo n.º 12
0
        protected PreprocessingData(PreprocessingData original, Cloner cloner)
            : base(original, cloner)
        {
            variableValues    = CopyVariableValues(original.variableValues);
            variableNames     = new List <string>(original.variableNames);
            TrainingPartition = (IntRange)original.TrainingPartition.Clone(cloner);
            TestPartition     = (IntRange)original.TestPartition.Clone(cloner);
            transformations   = new List <ITransformation>();

            RegisterEventHandler();
        }
Ejemplo n.º 13
0
 public void ReplaceIndicesByString(IDictionary <int, IList <int> > cells, string value)
 {
     PreprocessingData.InTransaction(() => {
         foreach (var column in cells)
         {
             foreach (var rowIdx in column.Value)
             {
                 PreprocessingData.SetValue(value, column.Key, rowIdx);
             }
         }
     });
 }
Ejemplo n.º 14
0
 public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary <int, IList <int> > cells)
 {
     PreprocessingData.InTransaction(() => {
         foreach (var column in cells)
         {
             IList <Tuple <int, int> > startEndings = GetStartAndEndingsForInterpolation(column);
             foreach (var tuple in startEndings)
             {
                 Interpolate(column, tuple.Item1, tuple.Item2);
             }
         }
     });
 }
Ejemplo n.º 15
0
        public List <int> ColumnsWithMissingValuesGreater(double percent)
        {
            List <int> columns = new List <int>();

            for (int i = 0; i < PreprocessingData.Columns; ++i)
            {
                int missingCount = PreprocessingData.GetMissingValueCount(i);
                if (100f / PreprocessingData.Rows * missingCount > percent)
                {
                    columns.Add(i);
                }
            }

            return(columns);
        }
Ejemplo n.º 16
0
        private void ReOrderToIndices <T>(int columnIndex, int[] indices)
        {
            var originalData = new List <T>(PreprocessingData.GetValues <T>(columnIndex));

            if (indices.Length != originalData.Count)
            {
                throw new InvalidOperationException("The number of provided indices does not match the values.");
            }

            for (int i = 0; i < indices.Length; i++)
            {
                T newValue = originalData[indices[i]];
                PreprocessingData.SetCell <T>(columnIndex, i, newValue);
            }
        }
Ejemplo n.º 17
0
        public void ReplaceIndicesByRandomValue(IDictionary <int, IList <int> > cells, bool considerSelection = false)
        {
            var rand = new FastRandom();

            ReplaceIndicesByValues(cells,
                                   col => {
                double min   = PreprocessingData.GetMin <double>(col, considerSelection);
                double max   = PreprocessingData.GetMax <double>(col, considerSelection);
                double range = max - min;
                return(cells[col].Select(_ => rand.NextDouble() * range + min));
            },
                                   col => {
                var min      = PreprocessingData.GetMin <DateTime>(col, considerSelection);
                var max      = PreprocessingData.GetMax <DateTime>(col, considerSelection);
                double range = (max - min).TotalSeconds;
                return(cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range)));
            });
        }
        public ScatterPlot CreateScatterPlot(string variableNameX, string variableNameY, string variableNameColor = "-")
        {
            ScatterPlot scatterPlot = new ScatterPlot();

            IList <double> xValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameX));
            IList <double> yValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameY));

            if (variableNameColor == null || variableNameColor == "-")
            {
                List <Point2D <double> > points = new List <Point2D <double> >();

                for (int i = 0; i < xValues.Count; i++)
                {
                    Point2D <double> point = new Point2D <double>(xValues[i], yValues[i]);
                    points.Add(point);
                }

                ScatterPlotDataRow scdr = new ScatterPlotDataRow(variableNameX + " - " + variableNameY, "", points);
                scatterPlot.Rows.Add(scdr);
            }
            else
            {
                var colorValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameColor));
                var data        = xValues.Zip(yValues, (x, y) => new { x, y }).Zip(colorValues, (v, c) => new { v.x, v.y, c }).ToList();
                var gradients   = ColorGradient.Colors;
                int curGradient = 0;
                int numColors   = colorValues.Distinct().Count();
                foreach (var colorValue in colorValues.Distinct())
                {
                    var values = data.Where(x => x.c == colorValue);
                    var row    = new ScatterPlotDataRow(
                        variableNameX + " - " + variableNameY + " (" + colorValue + ")",
                        "",
                        values.Select(v => new Point2D <double>(v.x, v.y)),
                        new ScatterPlotDataRowVisualProperties()
                    {
                        Color = gradients[curGradient]
                    });
                    curGradient += gradients.Count / numColors;
                    scatterPlot.Rows.Add(row);
                }
            }
            return(scatterPlot);
        }
Ejemplo n.º 19
0
        public IEnumerable <string> GetVariableNamesForHistogramClassification()
        {
            List <string> doubleVariableNames = new List <string>();

            //only return variable names from type double
            for (int i = 0; i < PreprocessingData.Columns; ++i)
            {
                if (PreprocessingData.VariableHasType <double>(i))
                {
                    double distinctValueCount = PreprocessingData.GetValues <double>(i).GroupBy(x => x).Count();
                    bool   distinctValuesOk   = distinctValueCount <= MAX_DISTINCT_VALUES_FOR_CLASSIFCATION;
                    if (distinctValuesOk)
                    {
                        doubleVariableNames.Add(PreprocessingData.GetVariableName(i));
                    }
                }
            }
            return(doubleVariableNames);
        }
Ejemplo n.º 20
0
        public ScatterPlot CreateScatterPlot(string variableNameX, string variableNameY)
        {
            ScatterPlot scatterPlot = new ScatterPlot();

            IList <double> xValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameX));
            IList <double> yValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameY));

            List <Point2D <double> > points = new List <Point2D <double> >();

            for (int i = 0; i < xValues.Count; i++)
            {
                Point2D <double> point = new Point2D <double>(xValues[i], yValues[i]);
                points.Add(point);
            }

            ScatterPlotDataRow scdr = new ScatterPlotDataRow(variableNameX + " - " + variableNameY, "", points);

            scatterPlot.Rows.Add(scdr);
            return(scatterPlot);
        }
Ejemplo n.º 21
0
 public void ReOrderToIndices(int[] indices)
 {
     PreprocessingData.InTransaction(() => {
         for (int i = 0; i < PreprocessingData.Columns; ++i)
         {
             if (PreprocessingData.VariableHasType <double>(i))
             {
                 ReOrderToIndices <double>(i, indices);
             }
             else if (PreprocessingData.VariableHasType <string>(i))
             {
                 ReOrderToIndices <string>(i, indices);
             }
             else if (PreprocessingData.VariableHasType <DateTime>(i))
             {
                 ReOrderToIndices <DateTime>(i, indices);
             }
         }
     });
 }
Ejemplo n.º 22
0
 public string GetValue(int rowIndex, int columnIndex)
 {
     return(PreprocessingData.GetCellAsString(columnIndex, rowIndex));
 }
Ejemplo n.º 23
0
 public void Apply(IList<IFilter> filters, bool isAndCombination) {
   PreprocessingData.PersistFilter();
   Reset();
 }
Ejemplo n.º 24
0
 public void DeleteColumn(int column)
 {
     PreprocessingData.DeleteColumn(column);
 }
Ejemplo n.º 25
0
 public void Reset() {
   PreprocessingData.ResetFilter();
 }
Ejemplo n.º 26
0
 public bool Validate(string value, out string errorMessage, int columnIndex)
 {
     return(PreprocessingData.Validate(value, out errorMessage, columnIndex));
 }
Ejemplo n.º 27
0
 public void DeleteRows(IEnumerable <int> rows)
 {
     PreprocessingData.DeleteRowsWithIndices(rows);
 }
Ejemplo n.º 28
0
 public bool SetValue(string value, int rowIndex, int columnIndex)
 {
     return(PreprocessingData.SetValue(value, columnIndex, rowIndex));
 }