コード例 #1
0
        /// <summary>
        /// Create a new column
        /// </summary>
        /// <param name="columnId"></param>
        /// <param name="index"></param>
        /// <param name="initialSize"></param>
        public ReactiveColumn(string columnId, IColumnIndex <T> index = null, int?initialSize = null)
        {
            _index   = index;
            ColumnId = columnId;

            Fields = initialSize == null ? new List <T>() : new List <T>(initialSize.Value);
        }
コード例 #2
0
ファイル: ColumnIndex.cs プロジェクト: romotchka/BabelMeta
 public bool OwnsAsAncestor(IColumnIndex ancestor)
 {
     if (Parent == ancestor)
     {
         return true;
     }
     return Parent != null && Parent.OwnsAsAncestor(ancestor);
 }
コード例 #3
0
ファイル: ColumnIndex.cs プロジェクト: romotchka/BabelMeta
 public ColumnIndex(int index, IDataSet mainContainer = null, IColumnIndex parent = null)
 {
     Index = index;
     Parent = parent;
     MainContainer = mainContainer;
 }
コード例 #4
0
ファイル: DataSet.cs プロジェクト: romotchka/BabelMeta
        /// <summary>
        /// By construction, the split pattern concerned is always the first element in the column split patterns list.
        /// See SplitPattern.SplitOccurrences property setter, for details.
        /// </summary>
        /// <param name="columnIndex"></param>
        /// <returns></returns>
        private bool SetColumnIndexSplitPatternOccurences(IColumnIndex columnIndex)
        {
            if (columnIndex == null)
            {
                return false;
            }

            // A pattern must exist for the current column depth.
            if (columnIndex.ColumnSplitPatterns == null || columnIndex.ColumnSplitPatterns.Count == 0)
            {
                return false;
            }

            var splitPattern = columnIndex.ColumnSplitPatterns[0];
            var splitPatternSeparatorToArray = new List<String>
                {
                    splitPattern.Separator,

                }.ToArray();

            if (!splitPattern.DynamicSplitOccurrences)
            {
                splitPattern.SplitOccurrences = splitPattern.SplitOccurrences; // This neutral affetation yet forces Initialization to true.
                return true;
            }

            // Each key is a specific number of splitted items - 1 (by convention) found at least once, within the cells.
            // Each value is the number of occurrences found for that specific number.
            // E.g. for the list {"Mike", "John", "Wolfgang Amadeus", "Pablo Diego José Francisco"}, the dictionary gets the 3 following entries:
            // (0, 2)
            // (1, 1)
            // (3, 1)
            // Value is accordingly always > 0.
            // According to convention, the number of *splits* is the number of words - 1.
            var columnSplitPatternCardinalityOccurrences = new Dictionary<int, int>();

            Cells
                .Where(c => c.CellColumnIndex == columnIndex)
                .ToList()
                .ForEach(c =>
                {
                    var rawSplittedStrings = c.Value.Split(splitPatternSeparatorToArray, splitPattern.PatternSplitOptions);
                    var rawSplittedStringsSplitsCount = rawSplittedStrings.Count() - 1; // !!! Convention.

                    if (columnSplitPatternCardinalityOccurrences.ContainsKey(rawSplittedStringsSplitsCount))
                    {
                        columnSplitPatternCardinalityOccurrences[rawSplittedStringsSplitsCount]++;
                    }
                    else
                    {
                        columnSplitPatternCardinalityOccurrences.Add(rawSplittedStringsSplitsCount, 1);
                    }
                });

            // Now, the most frequent count should be kept.
            // A strategy has to prevail if there are several split counts equally present (i.e. max does not occur for a single value).
            var mostFrequentCardinalityOccurrencesCount =
                columnSplitPatternCardinalityOccurrences.Values.Max();

            splitPattern.SplitOccurrences =
                (splitPattern.SplitDisambiguationStrategyWhenMultipleMaxCardinalitiesValue ==
                 SplitDisambiguationStrategyWhenMultipleMaxCardinalities.KeepGreatest)
                    ? columnSplitPatternCardinalityOccurrences
                        .Where(e => e.Value == mostFrequentCardinalityOccurrencesCount) // There can be more than one.
                        .Select(e => e.Key)
                        .Max()
                    : columnSplitPatternCardinalityOccurrences
                        .Where(e => e.Value == mostFrequentCardinalityOccurrencesCount) // There can be more than one.
                        .Select(e => e.Key)
                        .Min();

            return true;
        }
コード例 #5
0
ファイル: DataSet.cs プロジェクト: romotchka/BabelMeta
        public void InitializeColumnTypeLikelihood(IColumnIndex columnIndex = null, GenericDataType dataType = null)
        {
            // Proceed with all columns, if null index.
            if (columnIndex == null)
            {
                ColumnIndexes.ForEach(i => InitializeColumnTypeLikelihood(i));
                Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, proceed all columns.");
                return;
            }

            // If unknown column or if no matching cell, return now, rather than later.
            if  (
                    !ColumnIndexes.Contains(columnIndex)
                    || !Cells.Exists(c => c.CellColumnIndex == columnIndex)
                )
            {
                return;
            }

            // Proceed with all types for the given column, if null type.
            if (dataType == null)
            {
                DataTypes.ForEach(t => InitializeColumnTypeLikelihood(columnIndex, t));
                Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, proceed all types.");
                return;
            }

            // If unknown type, return.
            if (!DataTypes.Contains(dataType))
            {
                return;
            }

            lock (_dataSetLock)
            {
                // Pay attention to strict implementation of Weight nullity for parent indexes.
                var rowsCardinality = RowIndexes.Sum(i => i.Weight);
                if (rowsCardinality < 1.0)
                {
                    Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, insufficient cardinality.");
                    return;
                }

                // Prepare likelihood entry in the nested dictionary.
                if (!ColumnTypeLikelihood.ContainsKey(columnIndex))
                {
                    ColumnTypeLikelihood.Add(
                        columnIndex,
                        new Dictionary<GenericDataType, double?>()
                        );
                    Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, new dictionary column entry.");
                }
                Dictionary<GenericDataType, double?> columnLikelihoodValues;
                if (!ColumnTypeLikelihood.TryGetValue(columnIndex, out columnLikelihoodValues))
                {
                    return; // Impossible.
                }
                if (!columnLikelihoodValues.ContainsKey(dataType))
                {
                    columnLikelihoodValues.Add(dataType, null);
                    Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, new dictionary data type entry.");
                }

                // Compute likelihood.
                var likelihood = Cells
                    .Where(c =>
                        c.CellRowIndex != null && c.CellRowIndex.Weight > 0.0
                        && c.CellColumnIndex == columnIndex
                        && c.Value != null // Must not filter non-null empty strings.
                        )
                    .Sum(c =>
                        c.CellRowIndex.Weight
                        * c.Value.LikelihoodToBeType(dataType)
                        )
                        /
                        rowsCardinality;
                columnLikelihoodValues[dataType] = likelihood;
                Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, likelihood for current column/type is " + likelihood);
            }
        }
コード例 #6
0
ファイル: DataSet.cs プロジェクト: romotchka/BabelMeta
 public void InitializeColumnClassLikelihood(IColumnIndex columnIndex = null, GenericDataClass dataClass = null)
 {
 }
コード例 #7
0
ファイル: DataSet.cs プロジェクト: romotchka/BabelMeta
        public void ApplySplitPattern(IColumnIndex columnIndex = null)
        {
            if (columnIndex == null)
            {
                ColumnIndexes.ForEach(ApplySplitPattern);
                return;
            }

            lock (_dataSetLock)
            {
                if (SetColumnIndexSplitPatternOccurences(columnIndex))
                {
                    // TODO: Create subsequent cells.

                }
            }

            // Apply recursively to child column indexes.
        }
コード例 #8
0
ファイル: Cell.cs プロジェクト: romotchka/BabelMeta
 public Cell(IRowIndex rowIndex, IColumnIndex columnIndex, String value = "")
 {
     Value = value;
     CellRowIndex = rowIndex;
     CellColumnIndex = columnIndex;
 }