// @Assumption: labels are stored as a natural range; that is, their values e [0, #classes[ /// <remarks> /// Predicted labels are stored in the columns. /// Actual labels are stored in the rows. /// </remarks> public static Matrix <int> Create(Array <ILabel> actualLabels, Array <ILabel> predictedLabels, int classCount) { if (actualLabels.Length != predictedLabels.Length) { throw new InvalidOperationException(); } var absoluteConfusionMatrix = new MutableMatrix <int>( rowCount: classCount, columnCount: classCount); var instanceCount = actualLabels.Length; for (int instanceIndex = 0; instanceIndex < instanceCount; instanceIndex++) { var actual = ((SingleLabel)actualLabels[instanceIndex]).Value; var predicted = ((SingleLabel)predictedLabels[instanceIndex]).Value; var oldConfusionValue = absoluteConfusionMatrix.Get( rowIndex: actual, columnIndex: predicted); absoluteConfusionMatrix.Set( rowIndex: actual, columnIndex: predicted, oldConfusionValue + 1); } return(absoluteConfusionMatrix.ToMatrix()); }
public static Matrix <double> ComputeEuclideanDistanceMatrix(Matrix <float> datasetInstaces) { var instanceCount = datasetInstaces.RowCount; var distances = new MutableMatrix <double>(instanceCount, instanceCount); Parallel.For(0, instanceCount, i => { var lhs = datasetInstaces.GetRow(i); for (int j = 0; j < instanceCount; j++) { var rhs = datasetInstaces.GetRow(j); var distance = Euclidean(lhs, rhs); distances.Set(i, j, distance); } }); return(distances.ToMatrix()); }
public static Dataset CreateFromMutableObjects( FeatureType[] mutableFeatureTypes, MutableMatrix <float> mutableData, Array <ILabel> labels, bool isTrainDataset, ClassificationType classificationType ) { if (mutableFeatureTypes.Length != mutableData.ColumnCount) { throw new ArgumentException("featureTypes.Length must be equal to data.ColumnCount"); } if (mutableData.RowCount != labels.Length) { throw new ArgumentException("label.RowCount must be equal to data.RowCount"); } var featureTypes = mutableFeatureTypes.ToArray(); var data = mutableData.ToMatrix(); var dataTransposed = mutableData.Transpose().ToMatrix(); var featuresCount = featureTypes.Length; var sortedFeatureValues = new float[featuresCount][]; var sortedUniqueFeatureValues = new float[featuresCount][]; var featureValueFrequencies = new Dictionary <float, int> [featuresCount]; var dimensionIntervals = new IInterval[featuresCount]; var distanceMatrixTask = Task.Run(() => Distance.ComputeEuclideanDistanceMatrix(data)); Parallel.For(fromInclusive: 0, toExclusive: featuresCount, body: featureIndex => { var currentFeatureValues = dataTransposed.GetRow(featureIndex).ToArray(); ThrowIfDatasetContainsNonFiniteValues(currentFeatureValues); var sufv = currentFeatureValues .Distinct() .OrderBy(v => v) .ToArray(); sortedUniqueFeatureValues[featureIndex] = sufv; ThrowIfTrainDatasetContainsFeaturesWithSingleValue(isTrainDataset, featureIndex, sufv); sortedFeatureValues[featureIndex] = currentFeatureValues .OrderBy(v => v) .ToArray(); var counts = currentFeatureValues .GroupBy(v => v) .ToDictionary( keySelector: g => g.Key, elementSelector: g => g.Count()); featureValueFrequencies[featureIndex] = counts; }); int classCount = classificationType switch { ClassificationType.SingleLabel => labels.Distinct().Count(), ClassificationType.MultiLabel => ((MultiLabel)labels[0]).Values.Length, _ => throw CommonExceptions.UnknownClassificationType, }; var classFrequencies = classificationType switch { ClassificationType.SingleLabel => ComputeSingleLabelClassFrequencies(labels, classCount), ClassificationType.MultiLabel => ComputeMultiLabelClassFrequencies(labels, classCount), _ => throw CommonExceptions.UnknownClassificationType, }; var defaultLabel = ComputeDefaultLabel(labels, classificationType); Task.WaitAll(distanceMatrixTask); var distanceMatrix = distanceMatrixTask.Result; return(new Dataset( instanceCount: data.RowCount, featureCount: data.ColumnCount, classCount: classCount, classificationType: classificationType, instanceLabels: labels, featureTypes: featureTypes, data: data, distanceMatrix: distanceMatrix, classFrequencies: classFrequencies, defaultLabel: defaultLabel, sortedUniqueFeatureValues: sortedUniqueFeatureValues));