Esempio n. 1
0
 public void DistanceMatrixThrowArgumentNullExceptionItShouldOnDistanceMetric()
 {
     collectionSize          = 111;
     generatedDataCollection = new GenerateIdentifiableDataPointCollection(collectionSize);
     dataCollection          = generatedDataCollection.Generate();
     Assert.Catch <ArgumentNullException>(() => { new DistanceMatrix(dataCollection, null); });
 }
Esempio n. 2
0
        public void KMeansClusteringWorksOnTwoDimensions()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 2);
            var p2      = new IdentifiableDataPoint(1, 2);
            var p3      = new IdentifiableDataPoint(2, 2);
            var p4      = new IdentifiableDataPoint(3, 2);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);

            p2.AddAttribute("Gender", 0);
            p2.AddAttribute("Income", 0.1429);

            p3.AddAttribute("Gender", 1);
            p3.AddAttribute("Income", 0.2858);

            p4.AddAttribute("Gender", 1);
            p4.AddAttribute("Income", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            Assert.AreEqual(2, result2.Clusters[0].Members.Count);
            Assert.AreEqual(1, result2.Clusters[1].Members.Count);
            Assert.AreEqual(1, result2.Clusters[2].Members.Count);
        }
Esempio n. 3
0
        public void DistanceMatrixShouldReturnLargerCorrectDistanceMatrix()
        {
            double difference;

            collectionSize = 111;

            distanceMetric          = new EuclideanMetric();
            generatedDataCollection = new GenerateIdentifiableDataPointCollection(collectionSize);
            dataCollection          = generatedDataCollection.Generate();

            distanceMatrix = new DistanceMatrix(dataCollection, distanceMetric);
            expectedMatrix = ExpectedMatrix();

            for (int row = 0; row < distanceMatrix.Rows; row++)
            {
                for (int col = 0; col < distanceMatrix.Columns; col++)
                {
                    difference = distanceMatrix[row, col] - expectedMatrix[row, col];
                    if (!(difference < 0.01 && difference > -0.01 && distanceMatrix[row, col] >= 0))
                    {
                        Assert.Fail("{0}, row = {1}, col = {2}", difference, row, col);
                    }
                }
            }
        }
Esempio n. 4
0
        public void KMeansShouldCalculateCorrectCentroidsInOneDimension()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);
            p2.AddAttribute("Gender", 0);
            p3.AddAttribute("Gender", 1);
            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            double[] centroid1 = { 1 };
            double[] centroid2 = { 0 };

            Assert.AreEqual(centroid1, result2.Clusters[0].Centroid.Coordinates);
            Assert.AreEqual(centroid2, result2.Clusters[1].Centroid.Coordinates);
        }
Esempio n. 5
0
        public void KMeansShouldClusterDataPointsInOneDimensionCorrectly()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);
            p2.AddAttribute("Gender", 0);
            p3.AddAttribute("Gender", 1);
            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            var cluster1Members = result2.Clusters[0].Members.Select(e => e.Member).ToArray();
            var cluster2Members = result2.Clusters[1].Members.Select(e => e.Member).ToArray();

            Assert.AreEqual(3, cluster1Members.Length);
            Assert.Contains(p1, cluster1Members);
            Assert.Contains(p3, cluster1Members);
            Assert.Contains(p4, cluster1Members);

            Assert.AreEqual(1, cluster2Members.Length);
            Assert.Contains(p2, cluster2Members);
        }
Esempio n. 6
0
        public void SetUp()
        {
            var config   = SetupMockConfiguration();
            var reader   = new StringReader(Resources.SampleData);
            var importer = new CsvDataImporter(reader, config);

            dataSet = importer.Run();
        }
Esempio n. 7
0
        private void DataImportTask_Complete(Task <IdentifiableDataPointCollection> task)
        {
            ToggleUserInputControls(true);

            Result = task.Result;
            DataVisualizationForm form = new DataVisualizationForm(Result);

            form.Show();
        }
Esempio n. 8
0
        public DataVisualizationForm(IdentifiableDataPointCollection dataSet)
            : this()
        {
            this.dataSet   = dataSet;
            distanceMetric = DistanceMetric(currentDistanceMatrix);

            dataConversionTask          = new DataConversionTask();
            dataConversionTask.Success += DataConversionTask_Success;
            dataConversionTask.Failure += DataConversionTask_Failure;
        }
Esempio n. 9
0
        static void LocalOutlierFactorD(int kNeighbours, IdentifiableDataPointCollection dataCollection, IDistanceMetric distanceMetric)
        {
            DistanceMatrix distanceMatrix   = new DistanceMatrix(dataCollection, distanceMetric);
            var            outlierDetection = new LocalOutlierFactor(distanceMatrix, kNeighbours);
            var            list             = outlierDetection.Run();

            foreach (var person in list)
            {
                Console.WriteLine("Person: {0} has the Local Outlier Factor of {1}", person.ID, person.LocalOutlierFactor);
            }
        }
Esempio n. 10
0
        public void KMeansCentroidsArePlacedRightInFiveDimensions()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 5);
            var p2      = new IdentifiableDataPoint(1, 5);
            var p3      = new IdentifiableDataPoint(2, 5);
            var p4      = new IdentifiableDataPoint(3, 5);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);
            p1.AddAttribute("Age", 0.16);
            p1.AddAttribute("Purchase", 0.5);
            p1.AddAttribute("Control", 1);

            p2.AddAttribute("Gender", 0);
            p2.AddAttribute("Income", 0.1429);
            p2.AddAttribute("Age", 0.16);
            p2.AddAttribute("Purchase", 1);
            p2.AddAttribute("Control", 0);

            p3.AddAttribute("Gender", 1);
            p3.AddAttribute("Income", 0.2858);
            p3.AddAttribute("Age", 0.16);
            p3.AddAttribute("Purchase", 1);
            p3.AddAttribute("Control", 1);

            p4.AddAttribute("Gender", 1);
            p4.AddAttribute("Income", 1);
            p4.AddAttribute("Age", 0.16);
            p4.AddAttribute("Purchase", 1);
            p4.AddAttribute("Control", 0.5);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            double[] _centroid1 = { 1, 0.2858, 0.16, 1, 1 };
            double[] _centroid2 = { 1, 1, 0.16, 0.75, 0.75 };
            double[] _centroid3 = { 0, 0.1429, 0.16, 1, 0 };


            Assert.AreEqual(_centroid2, result2.Clusters[0].Centroid.Coordinates);
            Assert.AreEqual(_centroid3, result2.Clusters[1].Centroid.Coordinates);
            Assert.AreEqual(_centroid1, result2.Clusters[2].Centroid.Coordinates);
        }
Esempio n. 11
0
        public IdentifiableDataPointCollection Run()
        {
            var dataSet = new IdentifiableDataPointCollection();

            var csv = new CsvReader(reader);

            csv.Configuration.Delimiter = FieldDelimiter;

            var idCounter = 0;

            while (csv.Read())
            {
                IdentifiableDataPoint dataPoint = ParseRow(csv, idCounter++);
                dataSet.AddItem(dataPoint);
            }

            return(dataSet);
        }
Esempio n. 12
0
        static void MultiDimensionalScaling(StreamWriter writer, IdentifiableDataPointCollection dataCollection, IDistanceMetric distanceMetric)
        {
            writer.WriteLine("MDS coordinates");

            DistanceMatrix distanceMatrix = new DistanceMatrix(dataCollection, distanceMetric);

            var    mds          = new MultiDimensionalScaling(distanceMatrix);
            Matrix resultMatrix = mds.Calculate(); //a shitty name
            int    limiter      = 20;


            if (limiter > resultMatrix.Columns)
            {
                limiter = resultMatrix.Columns;
            }

            char letter = 'A';

            //Print file index
            writer.Write("    ");
            for (int columnIndex = 0; columnIndex < limiter; columnIndex++, letter++)
            {
                if (letter >= 'z')
                {
                    letter = 'A';
                }
                writer.Write("  {0}  |", letter);
            }
            writer.WriteLine();

            //Print the coordinates
            letter = 'X';
            for (int rowIndex = 0; rowIndex < 2; rowIndex++)
            {
                writer.Write(" {0} |", letter++);
                for (int columnIndex = 0; columnIndex < limiter; columnIndex++)
                {
                    writer.Write("{0,5:N2}|", resultMatrix[rowIndex, columnIndex]);
                }
                writer.Write("\r\n");
            }
            writer.WriteLine("\r\n");
        }
Esempio n. 13
0
        public void CentroidsHaveDistinctValues()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 2);
            var p2      = new IdentifiableDataPoint(1, 2);
            var p3      = new IdentifiableDataPoint(2, 2);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);

            p2.AddAttribute("Gender", 1);
            p2.AddAttribute("Income", 1);

            p3.AddAttribute("Gender", 0);
            p3.AddAttribute("Income", 0);


            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);

            Assert.Throws <InvalidOperationException>(() => new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()));
        }
Esempio n. 14
0
        public void CalculateWithZeroClusters()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);

            p2.AddAttribute("Gender", 0);

            p3.AddAttribute("Gender", 1);

            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            Assert.Throws <ArgumentException>(() => new KMeans(dataSet, 0, new EuclideanMetric()));
        }
Esempio n. 15
0
        static void DistanceMatrix(StreamWriter writer, IdentifiableDataPointCollection dataCollection, IDistanceMetric distanceMetric)
        {
            writer.WriteLine("MatrixFull");
            DistanceMatrix matrix = new DistanceMatrix(dataCollection, distanceMetric);

            int  limiter = 20; //For the full, use matrix.Rank;
            char letter  = 'A';

            writer.WriteLine("   |  A  |  B  |  C  |  D  |  E  |  F  |  G  |  H  |  I  |  J  |");
            for (int rowIndex = 0; rowIndex < limiter; rowIndex++)
            {
                writer.Write(" {0} |", letter++);
                for (int columnIndex = 0; columnIndex < limiter; columnIndex++)
                {
                    writer.Write("{0,5:N2}|", matrix[rowIndex, columnIndex]);
                }
                if (letter >= 'z')
                {
                    letter = 'A';
                }
                writer.Write("\r\n");
            }
            writer.Write("\r\n\r\n");
        }