public void DistanceMatrixThrowArgumentNullExceptionItShouldOnDistanceMetric() { collectionSize = 111; generatedDataCollection = new GenerateIdentifiableDataPointCollection(collectionSize); dataCollection = generatedDataCollection.Generate(); Assert.Catch <ArgumentNullException>(() => { new DistanceMatrix(dataCollection, null); }); }
public void KMeansClusteringWorksOnTwoDimensions() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 2); var p2 = new IdentifiableDataPoint(1, 2); var p3 = new IdentifiableDataPoint(2, 2); var p4 = new IdentifiableDataPoint(3, 2); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p2.AddAttribute("Gender", 0); p2.AddAttribute("Income", 0.1429); p3.AddAttribute("Gender", 1); p3.AddAttribute("Income", 0.2858); p4.AddAttribute("Gender", 1); p4.AddAttribute("Income", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); Assert.AreEqual(2, result2.Clusters[0].Members.Count); Assert.AreEqual(1, result2.Clusters[1].Members.Count); Assert.AreEqual(1, result2.Clusters[2].Members.Count); }
public void DistanceMatrixShouldReturnLargerCorrectDistanceMatrix() { double difference; collectionSize = 111; distanceMetric = new EuclideanMetric(); generatedDataCollection = new GenerateIdentifiableDataPointCollection(collectionSize); dataCollection = generatedDataCollection.Generate(); distanceMatrix = new DistanceMatrix(dataCollection, distanceMetric); expectedMatrix = ExpectedMatrix(); for (int row = 0; row < distanceMatrix.Rows; row++) { for (int col = 0; col < distanceMatrix.Columns; col++) { difference = distanceMatrix[row, col] - expectedMatrix[row, col]; if (!(difference < 0.01 && difference > -0.01 && distanceMatrix[row, col] >= 0)) { Assert.Fail("{0}, row = {1}, col = {2}", difference, row, col); } } } }
public void KMeansShouldCalculateCorrectCentroidsInOneDimension() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); double[] centroid1 = { 1 }; double[] centroid2 = { 0 }; Assert.AreEqual(centroid1, result2.Clusters[0].Centroid.Coordinates); Assert.AreEqual(centroid2, result2.Clusters[1].Centroid.Coordinates); }
public void KMeansShouldClusterDataPointsInOneDimensionCorrectly() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); var cluster1Members = result2.Clusters[0].Members.Select(e => e.Member).ToArray(); var cluster2Members = result2.Clusters[1].Members.Select(e => e.Member).ToArray(); Assert.AreEqual(3, cluster1Members.Length); Assert.Contains(p1, cluster1Members); Assert.Contains(p3, cluster1Members); Assert.Contains(p4, cluster1Members); Assert.AreEqual(1, cluster2Members.Length); Assert.Contains(p2, cluster2Members); }
public void SetUp() { var config = SetupMockConfiguration(); var reader = new StringReader(Resources.SampleData); var importer = new CsvDataImporter(reader, config); dataSet = importer.Run(); }
private void DataImportTask_Complete(Task <IdentifiableDataPointCollection> task) { ToggleUserInputControls(true); Result = task.Result; DataVisualizationForm form = new DataVisualizationForm(Result); form.Show(); }
public DataVisualizationForm(IdentifiableDataPointCollection dataSet) : this() { this.dataSet = dataSet; distanceMetric = DistanceMetric(currentDistanceMatrix); dataConversionTask = new DataConversionTask(); dataConversionTask.Success += DataConversionTask_Success; dataConversionTask.Failure += DataConversionTask_Failure; }
static void LocalOutlierFactorD(int kNeighbours, IdentifiableDataPointCollection dataCollection, IDistanceMetric distanceMetric) { DistanceMatrix distanceMatrix = new DistanceMatrix(dataCollection, distanceMetric); var outlierDetection = new LocalOutlierFactor(distanceMatrix, kNeighbours); var list = outlierDetection.Run(); foreach (var person in list) { Console.WriteLine("Person: {0} has the Local Outlier Factor of {1}", person.ID, person.LocalOutlierFactor); } }
public void KMeansCentroidsArePlacedRightInFiveDimensions() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 5); var p2 = new IdentifiableDataPoint(1, 5); var p3 = new IdentifiableDataPoint(2, 5); var p4 = new IdentifiableDataPoint(3, 5); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p1.AddAttribute("Age", 0.16); p1.AddAttribute("Purchase", 0.5); p1.AddAttribute("Control", 1); p2.AddAttribute("Gender", 0); p2.AddAttribute("Income", 0.1429); p2.AddAttribute("Age", 0.16); p2.AddAttribute("Purchase", 1); p2.AddAttribute("Control", 0); p3.AddAttribute("Gender", 1); p3.AddAttribute("Income", 0.2858); p3.AddAttribute("Age", 0.16); p3.AddAttribute("Purchase", 1); p3.AddAttribute("Control", 1); p4.AddAttribute("Gender", 1); p4.AddAttribute("Income", 1); p4.AddAttribute("Age", 0.16); p4.AddAttribute("Purchase", 1); p4.AddAttribute("Control", 0.5); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); double[] _centroid1 = { 1, 0.2858, 0.16, 1, 1 }; double[] _centroid2 = { 1, 1, 0.16, 0.75, 0.75 }; double[] _centroid3 = { 0, 0.1429, 0.16, 1, 0 }; Assert.AreEqual(_centroid2, result2.Clusters[0].Centroid.Coordinates); Assert.AreEqual(_centroid3, result2.Clusters[1].Centroid.Coordinates); Assert.AreEqual(_centroid1, result2.Clusters[2].Centroid.Coordinates); }
public IdentifiableDataPointCollection Run() { var dataSet = new IdentifiableDataPointCollection(); var csv = new CsvReader(reader); csv.Configuration.Delimiter = FieldDelimiter; var idCounter = 0; while (csv.Read()) { IdentifiableDataPoint dataPoint = ParseRow(csv, idCounter++); dataSet.AddItem(dataPoint); } return(dataSet); }
static void MultiDimensionalScaling(StreamWriter writer, IdentifiableDataPointCollection dataCollection, IDistanceMetric distanceMetric) { writer.WriteLine("MDS coordinates"); DistanceMatrix distanceMatrix = new DistanceMatrix(dataCollection, distanceMetric); var mds = new MultiDimensionalScaling(distanceMatrix); Matrix resultMatrix = mds.Calculate(); //a shitty name int limiter = 20; if (limiter > resultMatrix.Columns) { limiter = resultMatrix.Columns; } char letter = 'A'; //Print file index writer.Write(" "); for (int columnIndex = 0; columnIndex < limiter; columnIndex++, letter++) { if (letter >= 'z') { letter = 'A'; } writer.Write(" {0} |", letter); } writer.WriteLine(); //Print the coordinates letter = 'X'; for (int rowIndex = 0; rowIndex < 2; rowIndex++) { writer.Write(" {0} |", letter++); for (int columnIndex = 0; columnIndex < limiter; columnIndex++) { writer.Write("{0,5:N2}|", resultMatrix[rowIndex, columnIndex]); } writer.Write("\r\n"); } writer.WriteLine("\r\n"); }
public void CentroidsHaveDistinctValues() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 2); var p2 = new IdentifiableDataPoint(1, 2); var p3 = new IdentifiableDataPoint(2, 2); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p2.AddAttribute("Gender", 1); p2.AddAttribute("Income", 1); p3.AddAttribute("Gender", 0); p3.AddAttribute("Income", 0); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); Assert.Throws <InvalidOperationException>(() => new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric())); }
public void CalculateWithZeroClusters() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); Assert.Throws <ArgumentException>(() => new KMeans(dataSet, 0, new EuclideanMetric())); }
static void DistanceMatrix(StreamWriter writer, IdentifiableDataPointCollection dataCollection, IDistanceMetric distanceMetric) { writer.WriteLine("MatrixFull"); DistanceMatrix matrix = new DistanceMatrix(dataCollection, distanceMetric); int limiter = 20; //For the full, use matrix.Rank; char letter = 'A'; writer.WriteLine(" | A | B | C | D | E | F | G | H | I | J |"); for (int rowIndex = 0; rowIndex < limiter; rowIndex++) { writer.Write(" {0} |", letter++); for (int columnIndex = 0; columnIndex < limiter; columnIndex++) { writer.Write("{0,5:N2}|", matrix[rowIndex, columnIndex]); } if (letter >= 'z') { letter = 'A'; } writer.Write("\r\n"); } writer.Write("\r\n\r\n"); }