private ClusteredProperties(PivotedProperties pivotedProperties, IEnumerable <DataPropertyDescriptor> rowHeaders, Dictionary <string, ClusterRole.Transform> rowTransforms, Dictionary <object, ClusterRole> columnRoles) { PivotedProperties = pivotedProperties; RowHeaders = ImmutableList.ValueOf(rowHeaders); _rowTransforms = rowTransforms; _columnRoles = columnRoles; }
public static Clusterer CreateClusterer(CancellationToken cancellationToken, ClusteringSpec clusteringSpec, ReportResults reportResults) { var pivotedPropertySet = new PivotedProperties(reportResults.ItemProperties); pivotedPropertySet = pivotedPropertySet.ChangeSeriesGroups(pivotedPropertySet.CreateSeriesGroups()).ReorderItemProperties(); var clusteredProperties = ClusteredProperties.FromClusteringSpec(clusteringSpec, pivotedPropertySet); if (!clusteredProperties.RowValues.Any() && !clusteredProperties.ColumnValues.Any()) { clusteringSpec = ClusteringSpec.GetDefaultClusteringSpec(cancellationToken, reportResults, pivotedPropertySet); if (clusteringSpec == null) { return(null); } clusteredProperties = ClusteredProperties.FromClusteringSpec(clusteringSpec, pivotedPropertySet); } return(new Clusterer(cancellationToken, reportResults.RowItems, clusteredProperties, ClusterMetricType.FromName(clusteringSpec.DistanceMetric) ?? ClusterMetricType.DEFAULT)); }
/// <summary> /// Reorder the ItemProperties collection so that the ungrouped properties come first, /// followed by the grouped properties. /// If a group contains multiple series, the properties from those series are interleaved /// with each other. /// </summary> /// <returns></returns> public PivotedProperties ReorderItemProperties() { var groupedPropertyIndexes = SeriesGroups .SelectMany(group => group.SeriesList.SelectMany(series => series.PropertyIndexes)).ToHashSet(); var newOrder = new List <int>(); newOrder.AddRange(Enumerable.Range(0, ItemProperties.Count).Where(i => !groupedPropertyIndexes.Contains(i))); newOrder.AddRange(SeriesGroups.SelectMany(group => Enumerable.Range(0, group.PivotKeys.Count) .SelectMany(i => group.SeriesList.Select(series => series.PropertyIndexes[i])))); var newNumbering = new int[newOrder.Count]; for (int i = 0; i < newOrder.Count; i++) { newNumbering[newOrder[i]] = i; } var newItemProperties = new ItemProperties(newOrder.Select(i => ItemProperties[i])); var result = new PivotedProperties(newItemProperties, SeriesGroups.Select(group => group.RenumberProperties(newItemProperties, newNumbering))); #if DEBUG Debug.Assert(ItemProperties.ToHashSet().SetEquals(result.ItemProperties.ToHashSet())); Debug.Assert(SeriesGroups.Count == result.SeriesGroups.Count); for (int iGroup = 0; iGroup < SeriesGroups.Count; iGroup++) { Debug.Assert(SeriesGroups[iGroup].SeriesList.Count == result.SeriesGroups[iGroup].SeriesList.Count); Debug.Assert(SeriesGroups[iGroup].PivotKeys.SequenceEqual(result.SeriesGroups[iGroup].PivotKeys)); for (int iSeries = 0; iSeries < SeriesGroups[iGroup].SeriesList.Count; iSeries++) { var resultSeries = result.SeriesGroups[iGroup].SeriesList[iSeries]; Debug.Assert(resultSeries.PropertyIndexes.OrderBy(i => i).SequenceEqual(resultSeries.PropertyIndexes)); var series = SeriesGroups[iGroup].SeriesList[iSeries]; Debug.Assert(series.PropertyIndexes.Select(i => ItemProperties[i]) .SequenceEqual(resultSeries.PropertyIndexes.Select(i => result.ItemProperties[i]))); } } #endif return(result); }
public static ClusteringSpec GetDefaultClusteringSpec(CancellationToken cancellationToken, ReportResults reportResults, PivotedProperties pivotedProperties) { var values = new List <ValueSpec>(); foreach (var seriesGroup in pivotedProperties.SeriesGroups) { foreach (var series in seriesGroup.SeriesList) { var columnRef = ColumnRef.FromPivotedPropertySeries(series); if (columnRef == null) { continue; } if (reportResults.RowCount >= MIN_ROWS_TO_ASSUME_HEADER && EqualValuesInAllRows(cancellationToken, reportResults, series)) { values.Add(new ValueSpec(columnRef, ClusterRole.COLUMNHEADER)); } else { var transform = ClusterRole.IsNumericType(series.PropertyType) ? ClusterRole.ZSCORE : ClusterRole.BOOLEAN; values.Add(new ValueSpec(columnRef, transform)); } } } using (var propertyEnumerator = pivotedProperties.UngroupedProperties.GetEnumerator()) { while (propertyEnumerator.MoveNext()) { var columnRef = ColumnRef.FromPropertyDescriptor(propertyEnumerator.Current); if (columnRef == null) { continue; } values.Insert(0, new ValueSpec(columnRef, ClusterRole.ROWHEADER)); break; } if (values.Count == 1) { while (propertyEnumerator.MoveNext()) { var propertyDescriptor = propertyEnumerator.Current; if (!ClusterRole.IsNumericType(propertyDescriptor?.PropertyType)) { continue; } var columnRef = ColumnRef.FromPropertyDescriptor(propertyDescriptor); if (columnRef == null) { continue; } values.Add(new ValueSpec(columnRef, ClusterRole.RAW)); } if (values.Count == 1) { return(null); } } } if (values.Count == 0) { return(null); } return(new ClusteringSpec(values).ChangeDistanceMetric(ClusterMetricType.EUCLIDEAN.Name)); }
public static ClusteredProperties FromClusteringSpec(ClusteringSpec clusteringSpec, PivotedProperties pivotedProperties) { var allRoles = clusteringSpec.ToValueTransformDictionary(); var rowTransforms = new Dictionary <string, ClusterRole.Transform>(); var rowHeaders = new List <DataPropertyDescriptor>(); foreach (var property in pivotedProperties.UngroupedProperties) { var columnRef = ClusteringSpec.ColumnRef.FromPropertyDescriptor(property); if (columnRef != null && allRoles.TryGetValue(columnRef, out ClusterRole role)) { if (role == ClusterRole.ROWHEADER) { rowHeaders.Add(property); } else if (role is ClusterRole.Transform transform) { rowTransforms.Add(property.Name, transform); } } } var columnRoles = new Dictionary <object, ClusterRole>(); foreach (var group in pivotedProperties.SeriesGroups) { foreach (var series in group.SeriesList) { var columnRef = ToColumnRef(series); if (columnRef != null && allRoles.TryGetValue(columnRef, out ClusterRole role)) { if (role == ClusterRole.COLUMNHEADER || role is ClusterRole.Transform) { columnRoles.Add(series.SeriesId, role); } } } } return(new ClusteredProperties(pivotedProperties, rowHeaders, rowTransforms, columnRoles)); }
public ClusteredProperties ReplacePivotedProperties(PivotedProperties pivotedProperties) { return(new ClusteredProperties(pivotedProperties, RowHeaders, _rowTransforms, _columnRoles)); }