/// <summary> /// Create a new column /// </summary> /// <param name="columnId"></param> /// <param name="index"></param> /// <param name="initialSize"></param> public ReactiveColumn(string columnId, IColumnIndex <T> index = null, int?initialSize = null) { _index = index; ColumnId = columnId; Fields = initialSize == null ? new List <T>() : new List <T>(initialSize.Value); }
public bool OwnsAsAncestor(IColumnIndex ancestor) { if (Parent == ancestor) { return true; } return Parent != null && Parent.OwnsAsAncestor(ancestor); }
public ColumnIndex(int index, IDataSet mainContainer = null, IColumnIndex parent = null) { Index = index; Parent = parent; MainContainer = mainContainer; }
/// <summary> /// By construction, the split pattern concerned is always the first element in the column split patterns list. /// See SplitPattern.SplitOccurrences property setter, for details. /// </summary> /// <param name="columnIndex"></param> /// <returns></returns> private bool SetColumnIndexSplitPatternOccurences(IColumnIndex columnIndex) { if (columnIndex == null) { return false; } // A pattern must exist for the current column depth. if (columnIndex.ColumnSplitPatterns == null || columnIndex.ColumnSplitPatterns.Count == 0) { return false; } var splitPattern = columnIndex.ColumnSplitPatterns[0]; var splitPatternSeparatorToArray = new List<String> { splitPattern.Separator, }.ToArray(); if (!splitPattern.DynamicSplitOccurrences) { splitPattern.SplitOccurrences = splitPattern.SplitOccurrences; // This neutral affetation yet forces Initialization to true. return true; } // Each key is a specific number of splitted items - 1 (by convention) found at least once, within the cells. // Each value is the number of occurrences found for that specific number. // E.g. for the list {"Mike", "John", "Wolfgang Amadeus", "Pablo Diego José Francisco"}, the dictionary gets the 3 following entries: // (0, 2) // (1, 1) // (3, 1) // Value is accordingly always > 0. // According to convention, the number of *splits* is the number of words - 1. var columnSplitPatternCardinalityOccurrences = new Dictionary<int, int>(); Cells .Where(c => c.CellColumnIndex == columnIndex) .ToList() .ForEach(c => { var rawSplittedStrings = c.Value.Split(splitPatternSeparatorToArray, splitPattern.PatternSplitOptions); var rawSplittedStringsSplitsCount = rawSplittedStrings.Count() - 1; // !!! Convention. if (columnSplitPatternCardinalityOccurrences.ContainsKey(rawSplittedStringsSplitsCount)) { columnSplitPatternCardinalityOccurrences[rawSplittedStringsSplitsCount]++; } else { columnSplitPatternCardinalityOccurrences.Add(rawSplittedStringsSplitsCount, 1); } }); // Now, the most frequent count should be kept. // A strategy has to prevail if there are several split counts equally present (i.e. max does not occur for a single value). var mostFrequentCardinalityOccurrencesCount = columnSplitPatternCardinalityOccurrences.Values.Max(); splitPattern.SplitOccurrences = (splitPattern.SplitDisambiguationStrategyWhenMultipleMaxCardinalitiesValue == SplitDisambiguationStrategyWhenMultipleMaxCardinalities.KeepGreatest) ? columnSplitPatternCardinalityOccurrences .Where(e => e.Value == mostFrequentCardinalityOccurrencesCount) // There can be more than one. .Select(e => e.Key) .Max() : columnSplitPatternCardinalityOccurrences .Where(e => e.Value == mostFrequentCardinalityOccurrencesCount) // There can be more than one. .Select(e => e.Key) .Min(); return true; }
public void InitializeColumnTypeLikelihood(IColumnIndex columnIndex = null, GenericDataType dataType = null) { // Proceed with all columns, if null index. if (columnIndex == null) { ColumnIndexes.ForEach(i => InitializeColumnTypeLikelihood(i)); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, proceed all columns."); return; } // If unknown column or if no matching cell, return now, rather than later. if ( !ColumnIndexes.Contains(columnIndex) || !Cells.Exists(c => c.CellColumnIndex == columnIndex) ) { return; } // Proceed with all types for the given column, if null type. if (dataType == null) { DataTypes.ForEach(t => InitializeColumnTypeLikelihood(columnIndex, t)); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, proceed all types."); return; } // If unknown type, return. if (!DataTypes.Contains(dataType)) { return; } lock (_dataSetLock) { // Pay attention to strict implementation of Weight nullity for parent indexes. var rowsCardinality = RowIndexes.Sum(i => i.Weight); if (rowsCardinality < 1.0) { Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, insufficient cardinality."); return; } // Prepare likelihood entry in the nested dictionary. if (!ColumnTypeLikelihood.ContainsKey(columnIndex)) { ColumnTypeLikelihood.Add( columnIndex, new Dictionary<GenericDataType, double?>() ); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, new dictionary column entry."); } Dictionary<GenericDataType, double?> columnLikelihoodValues; if (!ColumnTypeLikelihood.TryGetValue(columnIndex, out columnLikelihoodValues)) { return; // Impossible. } if (!columnLikelihoodValues.ContainsKey(dataType)) { columnLikelihoodValues.Add(dataType, null); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, new dictionary data type entry."); } // Compute likelihood. var likelihood = Cells .Where(c => c.CellRowIndex != null && c.CellRowIndex.Weight > 0.0 && c.CellColumnIndex == columnIndex && c.Value != null // Must not filter non-null empty strings. ) .Sum(c => c.CellRowIndex.Weight * c.Value.LikelihoodToBeType(dataType) ) / rowsCardinality; columnLikelihoodValues[dataType] = likelihood; Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, likelihood for current column/type is " + likelihood); } }
public void InitializeColumnClassLikelihood(IColumnIndex columnIndex = null, GenericDataClass dataClass = null) { }
public void ApplySplitPattern(IColumnIndex columnIndex = null) { if (columnIndex == null) { ColumnIndexes.ForEach(ApplySplitPattern); return; } lock (_dataSetLock) { if (SetColumnIndexSplitPatternOccurences(columnIndex)) { // TODO: Create subsequent cells. } } // Apply recursively to child column indexes. }
public Cell(IRowIndex rowIndex, IColumnIndex columnIndex, String value = "") { Value = value; CellRowIndex = rowIndex; CellColumnIndex = columnIndex; }