protected GenericDataClass(GenericDataType dataType) { DataType = dataType; }
public void InitializeColumnTypeLikelihood(IColumnIndex columnIndex = null, GenericDataType dataType = null) { // Proceed with all columns, if null index. if (columnIndex == null) { ColumnIndexes.ForEach(i => InitializeColumnTypeLikelihood(i)); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, proceed all columns."); return; } // If unknown column or if no matching cell, return now, rather than later. if ( !ColumnIndexes.Contains(columnIndex) || !Cells.Exists(c => c.CellColumnIndex == columnIndex) ) { return; } // Proceed with all types for the given column, if null type. if (dataType == null) { DataTypes.ForEach(t => InitializeColumnTypeLikelihood(columnIndex, t)); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, proceed all types."); return; } // If unknown type, return. if (!DataTypes.Contains(dataType)) { return; } lock (_dataSetLock) { // Pay attention to strict implementation of Weight nullity for parent indexes. var rowsCardinality = RowIndexes.Sum(i => i.Weight); if (rowsCardinality < 1.0) { Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, insufficient cardinality."); return; } // Prepare likelihood entry in the nested dictionary. if (!ColumnTypeLikelihood.ContainsKey(columnIndex)) { ColumnTypeLikelihood.Add( columnIndex, new Dictionary<GenericDataType, double?>() ); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, new dictionary column entry."); } Dictionary<GenericDataType, double?> columnLikelihoodValues; if (!ColumnTypeLikelihood.TryGetValue(columnIndex, out columnLikelihoodValues)) { return; // Impossible. } if (!columnLikelihoodValues.ContainsKey(dataType)) { columnLikelihoodValues.Add(dataType, null); Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, new dictionary data type entry."); } // Compute likelihood. var likelihood = Cells .Where(c => c.CellRowIndex != null && c.CellRowIndex.Weight > 0.0 && c.CellColumnIndex == columnIndex && c.Value != null // Must not filter non-null empty strings. ) .Sum(c => c.CellRowIndex.Weight * c.Value.LikelihoodToBeType(dataType) ) / rowsCardinality; columnLikelihoodValues[dataType] = likelihood; Debug.WriteLine("DataSet.InitializeColumnTypeLikelihood, likelihood for current column/type is " + likelihood); } }