Example #1
0
        public void KMeansClusteringWorksOnTwoDimensions()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 2);
            var p2      = new IdentifiableDataPoint(1, 2);
            var p3      = new IdentifiableDataPoint(2, 2);
            var p4      = new IdentifiableDataPoint(3, 2);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);

            p2.AddAttribute("Gender", 0);
            p2.AddAttribute("Income", 0.1429);

            p3.AddAttribute("Gender", 1);
            p3.AddAttribute("Income", 0.2858);

            p4.AddAttribute("Gender", 1);
            p4.AddAttribute("Income", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            Assert.AreEqual(2, result2.Clusters[0].Members.Count);
            Assert.AreEqual(1, result2.Clusters[1].Members.Count);
            Assert.AreEqual(1, result2.Clusters[2].Members.Count);
        }
        public void AddAttributeShouldCheckUpperBound()
        {
            var datapoint = new IdentifiableDataPoint(0, 1);

            datapoint.AddAttribute("Age", 27);

            Assert.Throws <NumberOfDimensionsExceededException>(() => datapoint.AddAttribute("Status", 1));
        }
        public void IndexerShouldReturnCorrectValue()
        {
            var datapoint = new IdentifiableDataPoint(0, 5);

            datapoint.AddAttribute("Attr1", 42);
            datapoint.AddAttribute("Attr2", 24);
            datapoint.AddAttribute("Attr3", 25);

            Assert.AreEqual(42, datapoint["Attr1"]);
            Assert.AreEqual(24, datapoint["Attr2"]);
            Assert.AreEqual(25, datapoint["Attr3"]);
        }
        public void AttributesWithTheSameNameShouldBeAllowed()
        {
            var datapoint = new IdentifiableDataPoint(0, 5);

            datapoint.AddAttribute("Attr1", 41);
            datapoint.AddAttribute("Attr1", 42);

            Assert.AreEqual("Attr1", datapoint.Attributes[0]);
            Assert.AreEqual("Attr1", datapoint.Attributes[1]);
            Assert.AreEqual(41, datapoint[0]);
            Assert.AreEqual(42, datapoint[1]);
        }
        public void ShouldSaveAttributesInOrder()
        {
            var datapoint = new IdentifiableDataPoint(0, 3);

            datapoint.AddAttribute("Attr1", 27);
            datapoint.AddAttribute("Attr2", 23);
            datapoint.AddAttribute("Attr3", 24);

            Assert.AreEqual("Attr1", datapoint.Attributes[0]);
            Assert.AreEqual("Attr2", datapoint.Attributes[1]);
            Assert.AreEqual("Attr3", datapoint.Attributes[2]);
        }
Example #6
0
        public void KMeansCentroidsArePlacedRightInFiveDimensions()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 5);
            var p2      = new IdentifiableDataPoint(1, 5);
            var p3      = new IdentifiableDataPoint(2, 5);
            var p4      = new IdentifiableDataPoint(3, 5);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);
            p1.AddAttribute("Age", 0.16);
            p1.AddAttribute("Purchase", 0.5);
            p1.AddAttribute("Control", 1);

            p2.AddAttribute("Gender", 0);
            p2.AddAttribute("Income", 0.1429);
            p2.AddAttribute("Age", 0.16);
            p2.AddAttribute("Purchase", 1);
            p2.AddAttribute("Control", 0);

            p3.AddAttribute("Gender", 1);
            p3.AddAttribute("Income", 0.2858);
            p3.AddAttribute("Age", 0.16);
            p3.AddAttribute("Purchase", 1);
            p3.AddAttribute("Control", 1);

            p4.AddAttribute("Gender", 1);
            p4.AddAttribute("Income", 1);
            p4.AddAttribute("Age", 0.16);
            p4.AddAttribute("Purchase", 1);
            p4.AddAttribute("Control", 0.5);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            double[] _centroid1 = { 1, 0.2858, 0.16, 1, 1 };
            double[] _centroid2 = { 1, 1, 0.16, 0.75, 0.75 };
            double[] _centroid3 = { 0, 0.1429, 0.16, 1, 0 };


            Assert.AreEqual(_centroid2, result2.Clusters[0].Centroid.Coordinates);
            Assert.AreEqual(_centroid3, result2.Clusters[1].Centroid.Coordinates);
            Assert.AreEqual(_centroid1, result2.Clusters[2].Centroid.Coordinates);
        }
Example #7
0
        private void ParseMultipleBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile)
        {
            string label = csv.GetField <string>(field.Index);

            if (RemoveWhiteSpace)
            {
                label = label.Trim();
            }

            int fieldCount = field.Values.Count();

            foreach (IFieldValue possibleFieldValue in field.Values)
            {
                double value         = 0;
                string originalValue = "No";
                if (possibleFieldValue.Name.Equals(label))
                {
                    value         = field.Weight / fieldCount;
                    originalValue = "Yes";
                }

                string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name);
                profile.AddAttribute(name, value, originalValue);
            }
        }
Example #8
0
        private void ParseMultipleChoiceBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile)
        {
            string label = csv.GetField <string>(field.Index);

            if (RemoveWhiteSpace)
            {
                label = label.Trim();
            }

            string[] array = label.Split(ValueDelimiter).Select(l => l.Trim()).ToArray();

            int fieldCount = field.Values.Count();

            foreach (IFieldValue possibleFieldValue in field.Values)
            {
                double value         = 0;
                string originalValue = "No";
                if (array.Contains(possibleFieldValue.Name))
                {
                    value         = field.Weight / fieldCount;
                    originalValue = "Yes";
                }

                string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name);
                profile.AddAttribute(name, value, originalValue);
            }
        }
Example #9
0
        public void KMeansShouldCalculateCorrectCentroidsInOneDimension()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);
            p2.AddAttribute("Gender", 0);
            p3.AddAttribute("Gender", 1);
            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            double[] centroid1 = { 1 };
            double[] centroid2 = { 0 };

            Assert.AreEqual(centroid1, result2.Clusters[0].Centroid.Coordinates);
            Assert.AreEqual(centroid2, result2.Clusters[1].Centroid.Coordinates);
        }
Example #10
0
        public void KMeansShouldClusterDataPointsInOneDimensionCorrectly()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);
            p2.AddAttribute("Gender", 0);
            p3.AddAttribute("Gender", 1);
            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            var cluster1Members = result2.Clusters[0].Members.Select(e => e.Member).ToArray();
            var cluster2Members = result2.Clusters[1].Members.Select(e => e.Member).ToArray();

            Assert.AreEqual(3, cluster1Members.Length);
            Assert.Contains(p1, cluster1Members);
            Assert.Contains(p3, cluster1Members);
            Assert.Contains(p4, cluster1Members);

            Assert.AreEqual(1, cluster2Members.Length);
            Assert.Contains(p2, cluster2Members);
        }
Example #11
0
        private void ParseScalarField(IField field, CsvReader csv, IdentifiableDataPoint profile)
        {
            string label           = csv.GetField <string>(field.Index);
            double?translatedField = field.Values.GetDoubleValueFor(label);

            if (!translatedField.HasValue)
            {
                throw new InvalidFieldValueException(csv.Row, field.Index);
            }

            double value = translatedField.Value * field.Weight;

            profile.AddAttribute(field.Category, value, label);
        }
Example #12
0
        public void CentroidsHaveDistinctValues()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 2);
            var p2      = new IdentifiableDataPoint(1, 2);
            var p3      = new IdentifiableDataPoint(2, 2);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);

            p2.AddAttribute("Gender", 1);
            p2.AddAttribute("Income", 1);

            p3.AddAttribute("Gender", 0);
            p3.AddAttribute("Income", 0);


            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);

            Assert.Throws <InvalidOperationException>(() => new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()));
        }
Example #13
0
        private void ParseNumericField(IField field, CsvReader csv, IdentifiableDataPoint dataItem)
        {
            double valueInDataField;
            string val = csv.GetField(field.Index);


            if (!double.TryParse(val, NumberStyles.Any, parseCulture, out valueInDataField))
            {
                throw new InvalidNumericValueException(csv.Row, field.Index);
            }

            string originalValue   = valueInDataField.ToString();
            double difference      = field.MaxValue - field.MinValue;
            double normalizedValue = (valueInDataField - field.MinValue) / difference;
            double finalValue      = normalizedValue * field.Weight;

            dataItem.AddAttribute(field.Category, finalValue, originalValue);
        }
Example #14
0
        public void CalculateWithZeroClusters()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);

            p2.AddAttribute("Gender", 0);

            p3.AddAttribute("Gender", 1);

            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            Assert.Throws <ArgumentException>(() => new KMeans(dataSet, 0, new EuclideanMetric()));
        }