Beispiel #1
0
        private void ParseMultipleChoiceBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile)
        {
            string label = csv.GetField <string>(field.Index);

            if (RemoveWhiteSpace)
            {
                label = label.Trim();
            }

            string[] array = label.Split(ValueDelimiter).Select(l => l.Trim()).ToArray();

            int fieldCount = field.Values.Count();

            foreach (IFieldValue possibleFieldValue in field.Values)
            {
                double value         = 0;
                string originalValue = "No";
                if (array.Contains(possibleFieldValue.Name))
                {
                    value         = field.Weight / fieldCount;
                    originalValue = "Yes";
                }

                string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name);
                profile.AddAttribute(name, value, originalValue);
            }
        }
Beispiel #2
0
 public DrawableDataPoint(IdentifiableDataPoint orgin, double x, double y)
     : base(2)
 {
     Origin = orgin;
     X      = x;
     Y      = y;
 }
Beispiel #3
0
        public void KMeansClusteringWorksOnTwoDimensions()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 2);
            var p2      = new IdentifiableDataPoint(1, 2);
            var p3      = new IdentifiableDataPoint(2, 2);
            var p4      = new IdentifiableDataPoint(3, 2);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);

            p2.AddAttribute("Gender", 0);
            p2.AddAttribute("Income", 0.1429);

            p3.AddAttribute("Gender", 1);
            p3.AddAttribute("Income", 0.2858);

            p4.AddAttribute("Gender", 1);
            p4.AddAttribute("Income", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            Assert.AreEqual(2, result2.Clusters[0].Members.Count);
            Assert.AreEqual(1, result2.Clusters[1].Members.Count);
            Assert.AreEqual(1, result2.Clusters[2].Members.Count);
        }
Beispiel #4
0
        public void KMeansShouldCalculateCorrectCentroidsInOneDimension()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);
            p2.AddAttribute("Gender", 0);
            p3.AddAttribute("Gender", 1);
            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            double[] centroid1 = { 1 };
            double[] centroid2 = { 0 };

            Assert.AreEqual(centroid1, result2.Clusters[0].Centroid.Coordinates);
            Assert.AreEqual(centroid2, result2.Clusters[1].Centroid.Coordinates);
        }
Beispiel #5
0
        private void ParseMultipleBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile)
        {
            string label = csv.GetField <string>(field.Index);

            if (RemoveWhiteSpace)
            {
                label = label.Trim();
            }

            int fieldCount = field.Values.Count();

            foreach (IFieldValue possibleFieldValue in field.Values)
            {
                double value         = 0;
                string originalValue = "No";
                if (possibleFieldValue.Name.Equals(label))
                {
                    value         = field.Weight / fieldCount;
                    originalValue = "Yes";
                }

                string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name);
                profile.AddAttribute(name, value, originalValue);
            }
        }
Beispiel #6
0
        public void KMeansShouldClusterDataPointsInOneDimensionCorrectly()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);
            p2.AddAttribute("Gender", 0);
            p3.AddAttribute("Gender", 1);
            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            var cluster1Members = result2.Clusters[0].Members.Select(e => e.Member).ToArray();
            var cluster2Members = result2.Clusters[1].Members.Select(e => e.Member).ToArray();

            Assert.AreEqual(3, cluster1Members.Length);
            Assert.Contains(p1, cluster1Members);
            Assert.Contains(p3, cluster1Members);
            Assert.Contains(p4, cluster1Members);

            Assert.AreEqual(1, cluster2Members.Length);
            Assert.Contains(p2, cluster2Members);
        }
        public void GenerateDetails(IdentifiableDataPoint dataPoint, DataPoint centroid)
        {
            dataGridView1.Visible = true;
            dataPointInfoList     = new List <DataPointInfo>();

            lblID.Text = "ID: " + dataPoint.Id.ToString();

            dataGridView1.AutoGenerateColumns = false;

            for (int attributes = 0; attributes < dataPoint.Attributes.Count; attributes++)
            {
                dataPointInfo            = new DataPointInfo();
                dataPointInfo.Field      = dataPoint.Attributes[attributes];
                dataPointInfo.Value      = dataPoint.OriginalValues[attributes];
                dataPointInfo.Percent    = PercentageCalculator(dataPoint.Coordinates[attributes], centroid.Coordinates[attributes]);
                dataPointInfo.Similarity = SimilarityCalculator(dataPointInfo.Percent);
                dataPointInfoList.Add(dataPointInfo);
            }

            dataPointInfoList.Sort(delegate(DataPointInfo item1, DataPointInfo item2)
            {
                if (item1.Percent == 200 && item2.Percent != 200)
                {
                    return(1);
                }
                else if (item1.Percent != 200 && item2.Percent == 200)
                {
                    return(-1);
                }
                else if (item1.Percent < item2.Percent)
                {
                    return(1);
                }
                else if (item1.Percent > item2.Percent)
                {
                    return(-1);
                }
                else if (item1.Percent == item2.Percent)
                {
                    if (string.Compare(item1.Value, item2.Value) > 0)
                    {
                        return(1);
                    }
                    else if (string.Compare(item1.Value, item2.Value) < 0)
                    {
                        return(-1);
                    }
                    else
                    {
                        return(0);
                    }
                }
                else
                {
                    return(0);
                }
            });

            dataGridView1.DataSource = dataPointInfoList;
        }
Beispiel #8
0
        private IdentifiableDataPoint ParseRow(CsvReader csv, int id)
        {
            IdentifiableDataPoint dataItem = new IdentifiableDataPoint(id, config.DimensionCount);

            foreach (IField field in config.Fields)
            {
                switch (field.Type)
                {
                case FieldType.Scalar:
                    ParseScalarField(field, csv, dataItem);
                    break;

                case FieldType.MultipleBinaryFields:
                    ParseMultipleBinaryField(field, csv, dataItem);
                    break;

                case FieldType.MultipleChoiceMultipleBinaryFields:
                    ParseMultipleChoiceBinaryField(field, csv, dataItem);
                    break;

                case FieldType.Numeric:
                    ParseNumericField(field, csv, dataItem);
                    break;

                default:
                    throw new InvalidOperationException("Unknown field type.");
                }
            }

            return(dataItem);
        }
        public void AddAttributeShouldCheckUpperBound()
        {
            var datapoint = new IdentifiableDataPoint(0, 1);

            datapoint.AddAttribute("Age", 27);

            Assert.Throws <NumberOfDimensionsExceededException>(() => datapoint.AddAttribute("Status", 1));
        }
        public void AccessingNonExistingAttributeNameShouldThrowException()
        {
            var datapoint = new IdentifiableDataPoint(0, 5);

            var ex = Assert.Throws <ArgumentException>(() => datapoint["Attr1"].ToString());

            Assert.AreEqual("attributeName", ex.ParamName);
            Assert.IsTrue(ex.Message.Contains("'Attr1' does not exists"));
        }
        public void AttributesWithTheSameNameShouldBeAllowed()
        {
            var datapoint = new IdentifiableDataPoint(0, 5);

            datapoint.AddAttribute("Attr1", 41);
            datapoint.AddAttribute("Attr1", 42);

            Assert.AreEqual("Attr1", datapoint.Attributes[0]);
            Assert.AreEqual("Attr1", datapoint.Attributes[1]);
            Assert.AreEqual(41, datapoint[0]);
            Assert.AreEqual(42, datapoint[1]);
        }
        public void IndexerShouldReturnCorrectValue()
        {
            var datapoint = new IdentifiableDataPoint(0, 5);

            datapoint.AddAttribute("Attr1", 42);
            datapoint.AddAttribute("Attr2", 24);
            datapoint.AddAttribute("Attr3", 25);

            Assert.AreEqual(42, datapoint["Attr1"]);
            Assert.AreEqual(24, datapoint["Attr2"]);
            Assert.AreEqual(25, datapoint["Attr3"]);
        }
Beispiel #13
0
 private void DataPointClicked(object sender, DrawableDataPoint e)
 {
     if (clusterResult != null)
     {
         IdentifiableDataPoint point = e.Origin;
         Cluster cluster             = clusterResult.FindCluster(point);
         if (cluster != null)
         {
             dataPointDetailsComponent1.GenerateDetails(point, cluster.Centroid);
         }
     }
 }
        public void ShouldSaveAttributesInOrder()
        {
            var datapoint = new IdentifiableDataPoint(0, 3);

            datapoint.AddAttribute("Attr1", 27);
            datapoint.AddAttribute("Attr2", 23);
            datapoint.AddAttribute("Attr3", 24);

            Assert.AreEqual("Attr1", datapoint.Attributes[0]);
            Assert.AreEqual("Attr2", datapoint.Attributes[1]);
            Assert.AreEqual("Attr3", datapoint.Attributes[2]);
        }
Beispiel #15
0
        private void ParseScalarField(IField field, CsvReader csv, IdentifiableDataPoint profile)
        {
            string label           = csv.GetField <string>(field.Index);
            double?translatedField = field.Values.GetDoubleValueFor(label);

            if (!translatedField.HasValue)
            {
                throw new InvalidFieldValueException(csv.Row, field.Index);
            }

            double value = translatedField.Value * field.Weight;

            profile.AddAttribute(field.Category, value, label);
        }
Beispiel #16
0
        public void KMeansCentroidsArePlacedRightInFiveDimensions()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 5);
            var p2      = new IdentifiableDataPoint(1, 5);
            var p3      = new IdentifiableDataPoint(2, 5);
            var p4      = new IdentifiableDataPoint(3, 5);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);
            p1.AddAttribute("Age", 0.16);
            p1.AddAttribute("Purchase", 0.5);
            p1.AddAttribute("Control", 1);

            p2.AddAttribute("Gender", 0);
            p2.AddAttribute("Income", 0.1429);
            p2.AddAttribute("Age", 0.16);
            p2.AddAttribute("Purchase", 1);
            p2.AddAttribute("Control", 0);

            p3.AddAttribute("Gender", 1);
            p3.AddAttribute("Income", 0.2858);
            p3.AddAttribute("Age", 0.16);
            p3.AddAttribute("Purchase", 1);
            p3.AddAttribute("Control", 1);

            p4.AddAttribute("Gender", 1);
            p4.AddAttribute("Income", 1);
            p4.AddAttribute("Age", 0.16);
            p4.AddAttribute("Purchase", 1);
            p4.AddAttribute("Control", 0.5);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric());

            var result2 = kmeans2.Calculate();

            double[] _centroid1 = { 1, 0.2858, 0.16, 1, 1 };
            double[] _centroid2 = { 1, 1, 0.16, 0.75, 0.75 };
            double[] _centroid3 = { 0, 0.1429, 0.16, 1, 0 };


            Assert.AreEqual(_centroid2, result2.Clusters[0].Centroid.Coordinates);
            Assert.AreEqual(_centroid3, result2.Clusters[1].Centroid.Coordinates);
            Assert.AreEqual(_centroid1, result2.Clusters[2].Centroid.Coordinates);
        }
Beispiel #17
0
 public void HighlightPoint(IdentifiableDataPoint inputPoint)
 {
     foreach (var Series in chart1.Series)
     {
         foreach (var Point in Series.Points)
         {
             DrawableDataPoint drawDataPoint = (DrawableDataPoint)Point.Tag;
             if (inputPoint == drawDataPoint.Origin)
             {
                 HighlightPoint(Point);
                 return;
             }
         }
     }
 }
Beispiel #18
0
        private void ParseNumericField(IField field, CsvReader csv, IdentifiableDataPoint dataItem)
        {
            double valueInDataField;
            string val = csv.GetField(field.Index);


            if (!double.TryParse(val, NumberStyles.Any, parseCulture, out valueInDataField))
            {
                throw new InvalidNumericValueException(csv.Row, field.Index);
            }

            string originalValue   = valueInDataField.ToString();
            double difference      = field.MaxValue - field.MinValue;
            double normalizedValue = (valueInDataField - field.MinValue) / difference;
            double finalValue      = normalizedValue * field.Weight;

            dataItem.AddAttribute(field.Category, finalValue, originalValue);
        }
Beispiel #19
0
        public IdentifiableDataPointCollection Run()
        {
            var dataSet = new IdentifiableDataPointCollection();

            var csv = new CsvReader(reader);

            csv.Configuration.Delimiter = FieldDelimiter;

            var idCounter = 0;

            while (csv.Read())
            {
                IdentifiableDataPoint dataPoint = ParseRow(csv, idCounter++);
                dataSet.AddItem(dataPoint);
            }

            return(dataSet);
        }
Beispiel #20
0
        public void CalculateWithZeroClusters()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 1);
            var p2      = new IdentifiableDataPoint(1, 1);
            var p3      = new IdentifiableDataPoint(2, 1);
            var p4      = new IdentifiableDataPoint(3, 1);

            p1.AddAttribute("Gender", 1);

            p2.AddAttribute("Gender", 0);

            p3.AddAttribute("Gender", 1);

            p4.AddAttribute("Gender", 1);

            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);
            dataSet.AddItem(p4);

            Assert.Throws <ArgumentException>(() => new KMeans(dataSet, 0, new EuclideanMetric()));
        }
Beispiel #21
0
        public void CentroidsHaveDistinctValues()
        {
            var dataSet = new IdentifiableDataPointCollection();
            var p1      = new IdentifiableDataPoint(0, 2);
            var p2      = new IdentifiableDataPoint(1, 2);
            var p3      = new IdentifiableDataPoint(2, 2);

            p1.AddAttribute("Gender", 1);
            p1.AddAttribute("Income", 1);

            p2.AddAttribute("Gender", 1);
            p2.AddAttribute("Income", 1);

            p3.AddAttribute("Gender", 0);
            p3.AddAttribute("Income", 0);


            dataSet.AddItem(p1);
            dataSet.AddItem(p2);
            dataSet.AddItem(p3);

            Assert.Throws <InvalidOperationException>(() => new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()));
        }
Beispiel #22
0
 private void outlierDetectionComponent1_DataPointClick(object sender, IdentifiableDataPoint e)
 {
     //DataPointClick(this, identifiableDataPoint);
     dataPointDetailsComponent1.GenerateDetails(e, clusterResult.FindCluster(e).Centroid);
     scatterPlotControl1.HighlightPoint(e);
 }
        public void ShouldSaveIdAttribute()
        {
            var datapoint = new IdentifiableDataPoint(42, 1);

            Assert.AreEqual(42, datapoint.Id);
        }
Beispiel #24
0
        public void HighlightPoint(DrawableDataPoint inputPoint)
        {
            IdentifiableDataPoint drawPoint = inputPoint.Origin;

            HighlightPoint(drawPoint);
        }