private void ParseMultipleChoiceBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile) { string label = csv.GetField <string>(field.Index); if (RemoveWhiteSpace) { label = label.Trim(); } string[] array = label.Split(ValueDelimiter).Select(l => l.Trim()).ToArray(); int fieldCount = field.Values.Count(); foreach (IFieldValue possibleFieldValue in field.Values) { double value = 0; string originalValue = "No"; if (array.Contains(possibleFieldValue.Name)) { value = field.Weight / fieldCount; originalValue = "Yes"; } string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name); profile.AddAttribute(name, value, originalValue); } }
public DrawableDataPoint(IdentifiableDataPoint orgin, double x, double y) : base(2) { Origin = orgin; X = x; Y = y; }
public void KMeansClusteringWorksOnTwoDimensions() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 2); var p2 = new IdentifiableDataPoint(1, 2); var p3 = new IdentifiableDataPoint(2, 2); var p4 = new IdentifiableDataPoint(3, 2); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p2.AddAttribute("Gender", 0); p2.AddAttribute("Income", 0.1429); p3.AddAttribute("Gender", 1); p3.AddAttribute("Income", 0.2858); p4.AddAttribute("Gender", 1); p4.AddAttribute("Income", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); Assert.AreEqual(2, result2.Clusters[0].Members.Count); Assert.AreEqual(1, result2.Clusters[1].Members.Count); Assert.AreEqual(1, result2.Clusters[2].Members.Count); }
public void KMeansShouldCalculateCorrectCentroidsInOneDimension() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); double[] centroid1 = { 1 }; double[] centroid2 = { 0 }; Assert.AreEqual(centroid1, result2.Clusters[0].Centroid.Coordinates); Assert.AreEqual(centroid2, result2.Clusters[1].Centroid.Coordinates); }
private void ParseMultipleBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile) { string label = csv.GetField <string>(field.Index); if (RemoveWhiteSpace) { label = label.Trim(); } int fieldCount = field.Values.Count(); foreach (IFieldValue possibleFieldValue in field.Values) { double value = 0; string originalValue = "No"; if (possibleFieldValue.Name.Equals(label)) { value = field.Weight / fieldCount; originalValue = "Yes"; } string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name); profile.AddAttribute(name, value, originalValue); } }
public void KMeansShouldClusterDataPointsInOneDimensionCorrectly() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); var cluster1Members = result2.Clusters[0].Members.Select(e => e.Member).ToArray(); var cluster2Members = result2.Clusters[1].Members.Select(e => e.Member).ToArray(); Assert.AreEqual(3, cluster1Members.Length); Assert.Contains(p1, cluster1Members); Assert.Contains(p3, cluster1Members); Assert.Contains(p4, cluster1Members); Assert.AreEqual(1, cluster2Members.Length); Assert.Contains(p2, cluster2Members); }
public void GenerateDetails(IdentifiableDataPoint dataPoint, DataPoint centroid) { dataGridView1.Visible = true; dataPointInfoList = new List <DataPointInfo>(); lblID.Text = "ID: " + dataPoint.Id.ToString(); dataGridView1.AutoGenerateColumns = false; for (int attributes = 0; attributes < dataPoint.Attributes.Count; attributes++) { dataPointInfo = new DataPointInfo(); dataPointInfo.Field = dataPoint.Attributes[attributes]; dataPointInfo.Value = dataPoint.OriginalValues[attributes]; dataPointInfo.Percent = PercentageCalculator(dataPoint.Coordinates[attributes], centroid.Coordinates[attributes]); dataPointInfo.Similarity = SimilarityCalculator(dataPointInfo.Percent); dataPointInfoList.Add(dataPointInfo); } dataPointInfoList.Sort(delegate(DataPointInfo item1, DataPointInfo item2) { if (item1.Percent == 200 && item2.Percent != 200) { return(1); } else if (item1.Percent != 200 && item2.Percent == 200) { return(-1); } else if (item1.Percent < item2.Percent) { return(1); } else if (item1.Percent > item2.Percent) { return(-1); } else if (item1.Percent == item2.Percent) { if (string.Compare(item1.Value, item2.Value) > 0) { return(1); } else if (string.Compare(item1.Value, item2.Value) < 0) { return(-1); } else { return(0); } } else { return(0); } }); dataGridView1.DataSource = dataPointInfoList; }
private IdentifiableDataPoint ParseRow(CsvReader csv, int id) { IdentifiableDataPoint dataItem = new IdentifiableDataPoint(id, config.DimensionCount); foreach (IField field in config.Fields) { switch (field.Type) { case FieldType.Scalar: ParseScalarField(field, csv, dataItem); break; case FieldType.MultipleBinaryFields: ParseMultipleBinaryField(field, csv, dataItem); break; case FieldType.MultipleChoiceMultipleBinaryFields: ParseMultipleChoiceBinaryField(field, csv, dataItem); break; case FieldType.Numeric: ParseNumericField(field, csv, dataItem); break; default: throw new InvalidOperationException("Unknown field type."); } } return(dataItem); }
public void AddAttributeShouldCheckUpperBound() { var datapoint = new IdentifiableDataPoint(0, 1); datapoint.AddAttribute("Age", 27); Assert.Throws <NumberOfDimensionsExceededException>(() => datapoint.AddAttribute("Status", 1)); }
public void AccessingNonExistingAttributeNameShouldThrowException() { var datapoint = new IdentifiableDataPoint(0, 5); var ex = Assert.Throws <ArgumentException>(() => datapoint["Attr1"].ToString()); Assert.AreEqual("attributeName", ex.ParamName); Assert.IsTrue(ex.Message.Contains("'Attr1' does not exists")); }
public void AttributesWithTheSameNameShouldBeAllowed() { var datapoint = new IdentifiableDataPoint(0, 5); datapoint.AddAttribute("Attr1", 41); datapoint.AddAttribute("Attr1", 42); Assert.AreEqual("Attr1", datapoint.Attributes[0]); Assert.AreEqual("Attr1", datapoint.Attributes[1]); Assert.AreEqual(41, datapoint[0]); Assert.AreEqual(42, datapoint[1]); }
public void IndexerShouldReturnCorrectValue() { var datapoint = new IdentifiableDataPoint(0, 5); datapoint.AddAttribute("Attr1", 42); datapoint.AddAttribute("Attr2", 24); datapoint.AddAttribute("Attr3", 25); Assert.AreEqual(42, datapoint["Attr1"]); Assert.AreEqual(24, datapoint["Attr2"]); Assert.AreEqual(25, datapoint["Attr3"]); }
private void DataPointClicked(object sender, DrawableDataPoint e) { if (clusterResult != null) { IdentifiableDataPoint point = e.Origin; Cluster cluster = clusterResult.FindCluster(point); if (cluster != null) { dataPointDetailsComponent1.GenerateDetails(point, cluster.Centroid); } } }
public void ShouldSaveAttributesInOrder() { var datapoint = new IdentifiableDataPoint(0, 3); datapoint.AddAttribute("Attr1", 27); datapoint.AddAttribute("Attr2", 23); datapoint.AddAttribute("Attr3", 24); Assert.AreEqual("Attr1", datapoint.Attributes[0]); Assert.AreEqual("Attr2", datapoint.Attributes[1]); Assert.AreEqual("Attr3", datapoint.Attributes[2]); }
private void ParseScalarField(IField field, CsvReader csv, IdentifiableDataPoint profile) { string label = csv.GetField <string>(field.Index); double?translatedField = field.Values.GetDoubleValueFor(label); if (!translatedField.HasValue) { throw new InvalidFieldValueException(csv.Row, field.Index); } double value = translatedField.Value * field.Weight; profile.AddAttribute(field.Category, value, label); }
public void KMeansCentroidsArePlacedRightInFiveDimensions() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 5); var p2 = new IdentifiableDataPoint(1, 5); var p3 = new IdentifiableDataPoint(2, 5); var p4 = new IdentifiableDataPoint(3, 5); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p1.AddAttribute("Age", 0.16); p1.AddAttribute("Purchase", 0.5); p1.AddAttribute("Control", 1); p2.AddAttribute("Gender", 0); p2.AddAttribute("Income", 0.1429); p2.AddAttribute("Age", 0.16); p2.AddAttribute("Purchase", 1); p2.AddAttribute("Control", 0); p3.AddAttribute("Gender", 1); p3.AddAttribute("Income", 0.2858); p3.AddAttribute("Age", 0.16); p3.AddAttribute("Purchase", 1); p3.AddAttribute("Control", 1); p4.AddAttribute("Gender", 1); p4.AddAttribute("Income", 1); p4.AddAttribute("Age", 0.16); p4.AddAttribute("Purchase", 1); p4.AddAttribute("Control", 0.5); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); double[] _centroid1 = { 1, 0.2858, 0.16, 1, 1 }; double[] _centroid2 = { 1, 1, 0.16, 0.75, 0.75 }; double[] _centroid3 = { 0, 0.1429, 0.16, 1, 0 }; Assert.AreEqual(_centroid2, result2.Clusters[0].Centroid.Coordinates); Assert.AreEqual(_centroid3, result2.Clusters[1].Centroid.Coordinates); Assert.AreEqual(_centroid1, result2.Clusters[2].Centroid.Coordinates); }
public void HighlightPoint(IdentifiableDataPoint inputPoint) { foreach (var Series in chart1.Series) { foreach (var Point in Series.Points) { DrawableDataPoint drawDataPoint = (DrawableDataPoint)Point.Tag; if (inputPoint == drawDataPoint.Origin) { HighlightPoint(Point); return; } } } }
private void ParseNumericField(IField field, CsvReader csv, IdentifiableDataPoint dataItem) { double valueInDataField; string val = csv.GetField(field.Index); if (!double.TryParse(val, NumberStyles.Any, parseCulture, out valueInDataField)) { throw new InvalidNumericValueException(csv.Row, field.Index); } string originalValue = valueInDataField.ToString(); double difference = field.MaxValue - field.MinValue; double normalizedValue = (valueInDataField - field.MinValue) / difference; double finalValue = normalizedValue * field.Weight; dataItem.AddAttribute(field.Category, finalValue, originalValue); }
public IdentifiableDataPointCollection Run() { var dataSet = new IdentifiableDataPointCollection(); var csv = new CsvReader(reader); csv.Configuration.Delimiter = FieldDelimiter; var idCounter = 0; while (csv.Read()) { IdentifiableDataPoint dataPoint = ParseRow(csv, idCounter++); dataSet.AddItem(dataPoint); } return(dataSet); }
public void CalculateWithZeroClusters() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); Assert.Throws <ArgumentException>(() => new KMeans(dataSet, 0, new EuclideanMetric())); }
public void CentroidsHaveDistinctValues() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 2); var p2 = new IdentifiableDataPoint(1, 2); var p3 = new IdentifiableDataPoint(2, 2); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p2.AddAttribute("Gender", 1); p2.AddAttribute("Income", 1); p3.AddAttribute("Gender", 0); p3.AddAttribute("Income", 0); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); Assert.Throws <InvalidOperationException>(() => new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric())); }
private void outlierDetectionComponent1_DataPointClick(object sender, IdentifiableDataPoint e) { //DataPointClick(this, identifiableDataPoint); dataPointDetailsComponent1.GenerateDetails(e, clusterResult.FindCluster(e).Centroid); scatterPlotControl1.HighlightPoint(e); }
public void ShouldSaveIdAttribute() { var datapoint = new IdentifiableDataPoint(42, 1); Assert.AreEqual(42, datapoint.Id); }
public void HighlightPoint(DrawableDataPoint inputPoint) { IdentifiableDataPoint drawPoint = inputPoint.Origin; HighlightPoint(drawPoint); }