public void KMeansClusteringWorksOnTwoDimensions() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 2); var p2 = new IdentifiableDataPoint(1, 2); var p3 = new IdentifiableDataPoint(2, 2); var p4 = new IdentifiableDataPoint(3, 2); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p2.AddAttribute("Gender", 0); p2.AddAttribute("Income", 0.1429); p3.AddAttribute("Gender", 1); p3.AddAttribute("Income", 0.2858); p4.AddAttribute("Gender", 1); p4.AddAttribute("Income", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); Assert.AreEqual(2, result2.Clusters[0].Members.Count); Assert.AreEqual(1, result2.Clusters[1].Members.Count); Assert.AreEqual(1, result2.Clusters[2].Members.Count); }
public void AddAttributeShouldCheckUpperBound() { var datapoint = new IdentifiableDataPoint(0, 1); datapoint.AddAttribute("Age", 27); Assert.Throws <NumberOfDimensionsExceededException>(() => datapoint.AddAttribute("Status", 1)); }
public void IndexerShouldReturnCorrectValue() { var datapoint = new IdentifiableDataPoint(0, 5); datapoint.AddAttribute("Attr1", 42); datapoint.AddAttribute("Attr2", 24); datapoint.AddAttribute("Attr3", 25); Assert.AreEqual(42, datapoint["Attr1"]); Assert.AreEqual(24, datapoint["Attr2"]); Assert.AreEqual(25, datapoint["Attr3"]); }
public void AttributesWithTheSameNameShouldBeAllowed() { var datapoint = new IdentifiableDataPoint(0, 5); datapoint.AddAttribute("Attr1", 41); datapoint.AddAttribute("Attr1", 42); Assert.AreEqual("Attr1", datapoint.Attributes[0]); Assert.AreEqual("Attr1", datapoint.Attributes[1]); Assert.AreEqual(41, datapoint[0]); Assert.AreEqual(42, datapoint[1]); }
public void ShouldSaveAttributesInOrder() { var datapoint = new IdentifiableDataPoint(0, 3); datapoint.AddAttribute("Attr1", 27); datapoint.AddAttribute("Attr2", 23); datapoint.AddAttribute("Attr3", 24); Assert.AreEqual("Attr1", datapoint.Attributes[0]); Assert.AreEqual("Attr2", datapoint.Attributes[1]); Assert.AreEqual("Attr3", datapoint.Attributes[2]); }
public void KMeansCentroidsArePlacedRightInFiveDimensions() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 5); var p2 = new IdentifiableDataPoint(1, 5); var p3 = new IdentifiableDataPoint(2, 5); var p4 = new IdentifiableDataPoint(3, 5); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p1.AddAttribute("Age", 0.16); p1.AddAttribute("Purchase", 0.5); p1.AddAttribute("Control", 1); p2.AddAttribute("Gender", 0); p2.AddAttribute("Income", 0.1429); p2.AddAttribute("Age", 0.16); p2.AddAttribute("Purchase", 1); p2.AddAttribute("Control", 0); p3.AddAttribute("Gender", 1); p3.AddAttribute("Income", 0.2858); p3.AddAttribute("Age", 0.16); p3.AddAttribute("Purchase", 1); p3.AddAttribute("Control", 1); p4.AddAttribute("Gender", 1); p4.AddAttribute("Income", 1); p4.AddAttribute("Age", 0.16); p4.AddAttribute("Purchase", 1); p4.AddAttribute("Control", 0.5); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1, 2 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); double[] _centroid1 = { 1, 0.2858, 0.16, 1, 1 }; double[] _centroid2 = { 1, 1, 0.16, 0.75, 0.75 }; double[] _centroid3 = { 0, 0.1429, 0.16, 1, 0 }; Assert.AreEqual(_centroid2, result2.Clusters[0].Centroid.Coordinates); Assert.AreEqual(_centroid3, result2.Clusters[1].Centroid.Coordinates); Assert.AreEqual(_centroid1, result2.Clusters[2].Centroid.Coordinates); }
private void ParseMultipleBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile) { string label = csv.GetField <string>(field.Index); if (RemoveWhiteSpace) { label = label.Trim(); } int fieldCount = field.Values.Count(); foreach (IFieldValue possibleFieldValue in field.Values) { double value = 0; string originalValue = "No"; if (possibleFieldValue.Name.Equals(label)) { value = field.Weight / fieldCount; originalValue = "Yes"; } string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name); profile.AddAttribute(name, value, originalValue); } }
private void ParseMultipleChoiceBinaryField(IField field, CsvReader csv, IdentifiableDataPoint profile) { string label = csv.GetField <string>(field.Index); if (RemoveWhiteSpace) { label = label.Trim(); } string[] array = label.Split(ValueDelimiter).Select(l => l.Trim()).ToArray(); int fieldCount = field.Values.Count(); foreach (IFieldValue possibleFieldValue in field.Values) { double value = 0; string originalValue = "No"; if (array.Contains(possibleFieldValue.Name)) { value = field.Weight / fieldCount; originalValue = "Yes"; } string name = String.Format("{0}: {1}", field.Category, possibleFieldValue.Name); profile.AddAttribute(name, value, originalValue); } }
public void KMeansShouldCalculateCorrectCentroidsInOneDimension() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); double[] centroid1 = { 1 }; double[] centroid2 = { 0 }; Assert.AreEqual(centroid1, result2.Clusters[0].Centroid.Coordinates); Assert.AreEqual(centroid2, result2.Clusters[1].Centroid.Coordinates); }
public void KMeansShouldClusterDataPointsInOneDimensionCorrectly() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); var kmeans2 = new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric()); var result2 = kmeans2.Calculate(); var cluster1Members = result2.Clusters[0].Members.Select(e => e.Member).ToArray(); var cluster2Members = result2.Clusters[1].Members.Select(e => e.Member).ToArray(); Assert.AreEqual(3, cluster1Members.Length); Assert.Contains(p1, cluster1Members); Assert.Contains(p3, cluster1Members); Assert.Contains(p4, cluster1Members); Assert.AreEqual(1, cluster2Members.Length); Assert.Contains(p2, cluster2Members); }
private void ParseScalarField(IField field, CsvReader csv, IdentifiableDataPoint profile) { string label = csv.GetField <string>(field.Index); double?translatedField = field.Values.GetDoubleValueFor(label); if (!translatedField.HasValue) { throw new InvalidFieldValueException(csv.Row, field.Index); } double value = translatedField.Value * field.Weight; profile.AddAttribute(field.Category, value, label); }
public void CentroidsHaveDistinctValues() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 2); var p2 = new IdentifiableDataPoint(1, 2); var p3 = new IdentifiableDataPoint(2, 2); p1.AddAttribute("Gender", 1); p1.AddAttribute("Income", 1); p2.AddAttribute("Gender", 1); p2.AddAttribute("Income", 1); p3.AddAttribute("Gender", 0); p3.AddAttribute("Income", 0); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); Assert.Throws <InvalidOperationException>(() => new KMeans(dataSet, new [] { 0, 1 }, new EuclideanMetric())); }
private void ParseNumericField(IField field, CsvReader csv, IdentifiableDataPoint dataItem) { double valueInDataField; string val = csv.GetField(field.Index); if (!double.TryParse(val, NumberStyles.Any, parseCulture, out valueInDataField)) { throw new InvalidNumericValueException(csv.Row, field.Index); } string originalValue = valueInDataField.ToString(); double difference = field.MaxValue - field.MinValue; double normalizedValue = (valueInDataField - field.MinValue) / difference; double finalValue = normalizedValue * field.Weight; dataItem.AddAttribute(field.Category, finalValue, originalValue); }
public void CalculateWithZeroClusters() { var dataSet = new IdentifiableDataPointCollection(); var p1 = new IdentifiableDataPoint(0, 1); var p2 = new IdentifiableDataPoint(1, 1); var p3 = new IdentifiableDataPoint(2, 1); var p4 = new IdentifiableDataPoint(3, 1); p1.AddAttribute("Gender", 1); p2.AddAttribute("Gender", 0); p3.AddAttribute("Gender", 1); p4.AddAttribute("Gender", 1); dataSet.AddItem(p1); dataSet.AddItem(p2); dataSet.AddItem(p3); dataSet.AddItem(p4); Assert.Throws <ArgumentException>(() => new KMeans(dataSet, 0, new EuclideanMetric())); }