public List <int> ColumnsWithVarianceSmaller(double variance) { List <int> columns = new List <int>(); for (int i = 0; i < PreprocessingData.Columns; ++i) { if (PreprocessingData.VariableHasType <double>(i)) { double columnVariance = PreprocessingData.GetVariance <double>(i); if (columnVariance < variance) { columns.Add(i); } } else if (PreprocessingData.VariableHasType <DateTime>(i)) { double columnVariance = (double)PreprocessingData.GetVariance <DateTime>(i).Ticks / TimeSpan.TicksPerSecond; if (columnVariance < variance) { columns.Add(i); } } } return(columns); }
#pragma warning restore 0067 #endregion #region Manipulations private void ReplaceIndicesByValue(IDictionary <int, IList <int> > cells, Func <int, double> doubleAggregator = null, Func <int, DateTime> dateTimeAggregator = null, Func <int, string> stringAggregator = null) { PreprocessingData.InTransaction(() => { foreach (var column in cells) { if (doubleAggregator != null && PreprocessingData.VariableHasType <double>(column.Key)) { var value = doubleAggregator(column.Key); foreach (int index in column.Value) { PreprocessingData.SetCell <double>(column.Key, index, value); } } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType <DateTime>(column.Key)) { var value = dateTimeAggregator(column.Key); foreach (int index in column.Value) { PreprocessingData.SetCell <DateTime>(column.Key, index, value); } } else if (stringAggregator != null && PreprocessingData.VariableHasType <string>(column.Key)) { var value = stringAggregator(column.Key); foreach (int index in column.Value) { PreprocessingData.SetCell <string>(column.Key, index, value); } } } }); }
public void Shuffle(bool shuffleRangesSeparately) { var random = new FastRandom(); if (shuffleRangesSeparately) { var ranges = new[] { PreprocessingData.TestPartition, PreprocessingData.TrainingPartition }; PreprocessingData.InTransaction(() => { // process all given ranges - e.g. TrainingPartition, TestPartition foreach (IntRange range in ranges) { var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray(); var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray(); for (int i = range.Start, j = 0; i < range.End; i++, j++) { indices[i] = shuffledIndices[j]; } ReOrderToIndices(indices); } }); } else { PreprocessingData.InTransaction(() => { var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray(); indices.ShuffleInPlace(random); ReOrderToIndices(indices); }); } }
private void ReplaceIndicesByValues(IDictionary <int, IList <int> > cells, Func <int, IEnumerable <double> > doubleAggregator = null, Func <int, IEnumerable <DateTime> > dateTimeAggregator = null, Func <int, IEnumerable <string> > stringAggregator = null) { PreprocessingData.InTransaction(() => { foreach (var column in cells) { if (doubleAggregator != null && PreprocessingData.VariableHasType <double>(column.Key)) { var values = doubleAggregator(column.Key); foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value })) { PreprocessingData.SetCell <double>(column.Key, pair.row, pair.value); } } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType <DateTime>(column.Key)) { var values = dateTimeAggregator(column.Key); foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value })) { PreprocessingData.SetCell <DateTime>(column.Key, pair.row, pair.value); } } else if (stringAggregator != null && PreprocessingData.VariableHasType <string>(column.Key)) { var values = stringAggregator(column.Key); foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value })) { PreprocessingData.SetCell <string>(column.Key, pair.row, pair.value); } } } }); }
public void ReplaceIndicesByMode(IDictionary <int, IList <int> > cells, bool considerSelection = false) { ReplaceIndicesByValue(cells, col => PreprocessingData.GetMode <double>(col, considerSelection), col => PreprocessingData.GetMode <DateTime>(col, considerSelection), col => PreprocessingData.GetMode <string>(col, considerSelection)); }
private void Interpolate(KeyValuePair <int, IList <int> > column, int prevIndex, int nextIndex) { int valuesToInterpolate = nextIndex - prevIndex; if (PreprocessingData.VariableHasType <double>(column.Key)) { double prev = PreprocessingData.GetCell <double>(column.Key, prevIndex); double next = PreprocessingData.GetCell <double>(column.Key, nextIndex); double interpolationStep = (next - prev) / valuesToInterpolate; for (int i = prevIndex; i < nextIndex; ++i) { double interpolated = prev + (interpolationStep * (i - prevIndex)); PreprocessingData.SetCell <double>(column.Key, i, interpolated); } } else if (PreprocessingData.VariableHasType <DateTime>(column.Key)) { DateTime prev = PreprocessingData.GetCell <DateTime>(column.Key, prevIndex); DateTime next = PreprocessingData.GetCell <DateTime>(column.Key, nextIndex); double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate; for (int i = prevIndex; i < nextIndex; ++i) { DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex)); PreprocessingData.SetCell <DateTime>(column.Key, i, interpolated); } } }
private void DeleteColumns(List <int> columns) { PreprocessingData.InTransaction(() => { foreach (int column in columns.OrderByDescending(x => x)) { PreprocessingData.DeleteColumn(column); } }); }
private void DeleteRows(List <int> rows) { PreprocessingData.InTransaction(() => { foreach (int row in rows.OrderByDescending(x => x)) { PreprocessingData.DeleteRow(row); } }); }
public bool[] Preview(IList<IFilter> filters, bool isAndCombination) { IList<IFilter> activeFilters = filters.Where(f => f.Active && f.ConstraintData != null).ToList<IFilter>(); if (activeFilters.Count > 0) { var result = GetActiveFilterResult(activeFilters, isAndCombination); PreprocessingData.SetFilter(result); return result; } else { return CreateBoolArray(PreprocessingData.Rows, false); } }
private int IndexOfPrevPresentValue(int columnIndex, int start) { int offset = start - 1; while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) { offset--; } return(offset); }
private int IndexOfNextPresentValue(int columnIndex, int start) { int offset = start + 1; while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) { offset++; } return(offset); }
protected PreprocessingData(PreprocessingData original, Cloner cloner) : base(original, cloner) { variableValues = CopyVariableValues(original.variableValues); variableNames = new List <string>(original.variableNames); TrainingPartition = (IntRange)original.TrainingPartition.Clone(cloner); TestPartition = (IntRange)original.TestPartition.Clone(cloner); transformations = new List <ITransformation>(); RegisterEventHandler(); }
public void ReplaceIndicesByString(IDictionary <int, IList <int> > cells, string value) { PreprocessingData.InTransaction(() => { foreach (var column in cells) { foreach (var rowIdx in column.Value) { PreprocessingData.SetValue(value, column.Key, rowIdx); } } }); }
public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary <int, IList <int> > cells) { PreprocessingData.InTransaction(() => { foreach (var column in cells) { IList <Tuple <int, int> > startEndings = GetStartAndEndingsForInterpolation(column); foreach (var tuple in startEndings) { Interpolate(column, tuple.Item1, tuple.Item2); } } }); }
public List <int> ColumnsWithMissingValuesGreater(double percent) { List <int> columns = new List <int>(); for (int i = 0; i < PreprocessingData.Columns; ++i) { int missingCount = PreprocessingData.GetMissingValueCount(i); if (100f / PreprocessingData.Rows * missingCount > percent) { columns.Add(i); } } return(columns); }
private void ReOrderToIndices <T>(int columnIndex, int[] indices) { var originalData = new List <T>(PreprocessingData.GetValues <T>(columnIndex)); if (indices.Length != originalData.Count) { throw new InvalidOperationException("The number of provided indices does not match the values."); } for (int i = 0; i < indices.Length; i++) { T newValue = originalData[indices[i]]; PreprocessingData.SetCell <T>(columnIndex, i, newValue); } }
public void ReplaceIndicesByRandomValue(IDictionary <int, IList <int> > cells, bool considerSelection = false) { var rand = new FastRandom(); ReplaceIndicesByValues(cells, col => { double min = PreprocessingData.GetMin <double>(col, considerSelection); double max = PreprocessingData.GetMax <double>(col, considerSelection); double range = max - min; return(cells[col].Select(_ => rand.NextDouble() * range + min)); }, col => { var min = PreprocessingData.GetMin <DateTime>(col, considerSelection); var max = PreprocessingData.GetMax <DateTime>(col, considerSelection); double range = (max - min).TotalSeconds; return(cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range))); }); }
public ScatterPlot CreateScatterPlot(string variableNameX, string variableNameY, string variableNameColor = "-") { ScatterPlot scatterPlot = new ScatterPlot(); IList <double> xValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameX)); IList <double> yValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameY)); if (variableNameColor == null || variableNameColor == "-") { List <Point2D <double> > points = new List <Point2D <double> >(); for (int i = 0; i < xValues.Count; i++) { Point2D <double> point = new Point2D <double>(xValues[i], yValues[i]); points.Add(point); } ScatterPlotDataRow scdr = new ScatterPlotDataRow(variableNameX + " - " + variableNameY, "", points); scatterPlot.Rows.Add(scdr); } else { var colorValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameColor)); var data = xValues.Zip(yValues, (x, y) => new { x, y }).Zip(colorValues, (v, c) => new { v.x, v.y, c }).ToList(); var gradients = ColorGradient.Colors; int curGradient = 0; int numColors = colorValues.Distinct().Count(); foreach (var colorValue in colorValues.Distinct()) { var values = data.Where(x => x.c == colorValue); var row = new ScatterPlotDataRow( variableNameX + " - " + variableNameY + " (" + colorValue + ")", "", values.Select(v => new Point2D <double>(v.x, v.y)), new ScatterPlotDataRowVisualProperties() { Color = gradients[curGradient] }); curGradient += gradients.Count / numColors; scatterPlot.Rows.Add(row); } } return(scatterPlot); }
public IEnumerable <string> GetVariableNamesForHistogramClassification() { List <string> doubleVariableNames = new List <string>(); //only return variable names from type double for (int i = 0; i < PreprocessingData.Columns; ++i) { if (PreprocessingData.VariableHasType <double>(i)) { double distinctValueCount = PreprocessingData.GetValues <double>(i).GroupBy(x => x).Count(); bool distinctValuesOk = distinctValueCount <= MAX_DISTINCT_VALUES_FOR_CLASSIFCATION; if (distinctValuesOk) { doubleVariableNames.Add(PreprocessingData.GetVariableName(i)); } } } return(doubleVariableNames); }
public ScatterPlot CreateScatterPlot(string variableNameX, string variableNameY) { ScatterPlot scatterPlot = new ScatterPlot(); IList <double> xValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameX)); IList <double> yValues = PreprocessingData.GetValues <double>(PreprocessingData.GetColumnIndex(variableNameY)); List <Point2D <double> > points = new List <Point2D <double> >(); for (int i = 0; i < xValues.Count; i++) { Point2D <double> point = new Point2D <double>(xValues[i], yValues[i]); points.Add(point); } ScatterPlotDataRow scdr = new ScatterPlotDataRow(variableNameX + " - " + variableNameY, "", points); scatterPlot.Rows.Add(scdr); return(scatterPlot); }
public void ReOrderToIndices(int[] indices) { PreprocessingData.InTransaction(() => { for (int i = 0; i < PreprocessingData.Columns; ++i) { if (PreprocessingData.VariableHasType <double>(i)) { ReOrderToIndices <double>(i, indices); } else if (PreprocessingData.VariableHasType <string>(i)) { ReOrderToIndices <string>(i, indices); } else if (PreprocessingData.VariableHasType <DateTime>(i)) { ReOrderToIndices <DateTime>(i, indices); } } }); }
public string GetValue(int rowIndex, int columnIndex) { return(PreprocessingData.GetCellAsString(columnIndex, rowIndex)); }
public void Apply(IList<IFilter> filters, bool isAndCombination) { PreprocessingData.PersistFilter(); Reset(); }
public void DeleteColumn(int column) { PreprocessingData.DeleteColumn(column); }
public void Reset() { PreprocessingData.ResetFilter(); }
public bool Validate(string value, out string errorMessage, int columnIndex) { return(PreprocessingData.Validate(value, out errorMessage, columnIndex)); }
public void DeleteRows(IEnumerable <int> rows) { PreprocessingData.DeleteRowsWithIndices(rows); }
public bool SetValue(string value, int rowIndex, int columnIndex) { return(PreprocessingData.SetValue(value, columnIndex, rowIndex)); }